From 47661ee1821fc3a6b5ae07aac37410d6ccada976 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 18 Jun 2021 13:18:46 +0200 Subject: [PATCH 001/714] memory: tegra: Add compile-test stub for tegra_mc_probe_device() The tegra_mc_probe_device() symbol is only available when the TEGRA_MC Kconfig option is enabled. Provide a stub if that's not the case so that the driver can be compile-tested. Reported-by: kernel test robot Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20210618111846.1286166-1-thierry.reding@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/soc/tegra/mc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index e19c2504a14bd5..1066b1194a5a9a 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -237,14 +237,19 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc); #ifdef CONFIG_TEGRA_MC struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev); +int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev); #else static inline struct tegra_mc * devm_tegra_memory_controller_get(struct device *dev) { return ERR_PTR(-ENODEV); } -#endif -int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev); +static inline int +tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev) +{ + return -ENODEV; +} +#endif #endif /* __SOC_TEGRA_MC_H__ */ From bf3ec9deaa33889630722c47f7bb86ba58872ea7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 21 Jun 2021 16:00:36 +0200 Subject: [PATCH 002/714] dt-bindings: arm-smmu: Fix json-schema syntax Commit 4287861dca9d ("dt-bindings: arm-smmu: Add Tegra186 compatible string") introduced a jsonschema syntax error as a result of a rebase gone wrong. Fix it. Fixes: 4287861dca9d ("dt-bindings: arm-smmu: Add Tegra186 compatible string") Reported-by: Rob Herring Signed-off-by: Thierry Reding Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210621140036.2879563-1-thierry.reding@gmail.com Signed-off-by: Krzysztof Kozlowski --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 1181b590db7149..03f2b2d4db3089 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -52,16 +52,14 @@ properties: items: - const: marvell,ap806-smmu-500 - const: arm,mmu-500 - - description: NVIDIA SoCs that program two ARM MMU-500s identically - items: - description: NVIDIA SoCs that require memory controller interaction and may program multiple ARM MMU-500s identically with the memory controller interleaving translations between multiple instances for improved performance. items: - enum: - - const: nvidia,tegra194-smmu - - const: nvidia,tegra186-smmu + - nvidia,tegra194-smmu + - nvidia,tegra186-smmu - const: nvidia,smmu-500 - items: - const: arm,mmu-500 From cc3ddee97cff034cea4d095de4a484c92a219bf5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 21 Jan 2021 10:08:59 +0100 Subject: [PATCH 003/714] vboxsf: Honor excl flag to the dir-inode create op Honor the excl flag to the dir-inode create op, instead of behaving as if it is always set. Note the old behavior still worked most of the time since a non-exclusive open only calls the create op, if there is a race and the file is created between the dentry lookup and the calling of the create call. While at it change the type of the is_dir parameter to the vboxsf_dir_create() helper from an int to a bool, to be consistent with the use of bool for the excl parameter. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Hans de Goede --- fs/vboxsf/dir.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index eac6788fc6cffc..6ff84343edcad1 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -253,7 +253,7 @@ static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, } static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, - umode_t mode, int is_dir) + umode_t mode, bool is_dir, bool excl) { struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); @@ -261,10 +261,12 @@ static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, int err; params.handle = SHFL_HANDLE_NIL; - params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | - SHFL_CF_ACT_FAIL_IF_EXISTS | - SHFL_CF_ACCESS_READWRITE | - (is_dir ? SHFL_CF_DIRECTORY : 0); + params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | SHFL_CF_ACCESS_READWRITE; + if (is_dir) + params.create_flags |= SHFL_CF_DIRECTORY; + if (excl) + params.create_flags |= SHFL_CF_ACT_FAIL_IF_EXISTS; + params.info.attr.mode = (mode & 0777) | (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE); params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING; @@ -292,14 +294,14 @@ static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) { - return vboxsf_dir_create(parent, dentry, mode, 0); + return vboxsf_dir_create(parent, dentry, mode, false, excl); } static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns, struct inode *parent, struct dentry *dentry, umode_t mode) { - return vboxsf_dir_create(parent, dentry, mode, 1); + return vboxsf_dir_create(parent, dentry, mode, true, true); } static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) From ab0c29687bc7a890d1a86ac376b0b0fd78b2d9b6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 21 Jan 2021 10:22:27 +0100 Subject: [PATCH 004/714] vboxsf: Make vboxsf_dir_create() return the handle for the created file Make vboxsf_dir_create() optionally return the vboxsf-handle for the created file. This is a preparation patch for adding atomic_open support. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Hans de Goede --- fs/vboxsf/dir.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 6ff84343edcad1..87d5b2fef592bb 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -253,7 +253,7 @@ static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, } static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, - umode_t mode, bool is_dir, bool excl) + umode_t mode, bool is_dir, bool excl, u64 *handle_ret) { struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); @@ -278,30 +278,34 @@ static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, if (params.result != SHFL_FILE_CREATED) return -EPERM; - vboxsf_close(sbi->root, params.handle); - err = vboxsf_dir_instantiate(parent, dentry, ¶ms.info); if (err) - return err; + goto out; /* parent directory access/change time changed */ sf_parent_i->force_restat = 1; - return 0; +out: + if (err == 0 && handle_ret) + *handle_ret = params.handle; + else + vboxsf_close(sbi->root, params.handle); + + return err; } static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) { - return vboxsf_dir_create(parent, dentry, mode, false, excl); + return vboxsf_dir_create(parent, dentry, mode, false, excl, NULL); } static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns, struct inode *parent, struct dentry *dentry, umode_t mode) { - return vboxsf_dir_create(parent, dentry, mode, true, true); + return vboxsf_dir_create(parent, dentry, mode, true, true, NULL); } static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) From 02f840f90764f22f5c898901849bdbf0cee752ba Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 21 Jan 2021 10:55:03 +0100 Subject: [PATCH 005/714] vboxsf: Add vboxsf_[create|release]_sf_handle() helpers Factor out the code to create / release a struct vboxsf_handle into 2 new helper functions. This is a preparation patch for adding atomic_open support. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Hans de Goede --- fs/vboxsf/file.c | 71 ++++++++++++++++++++++++++++------------------ fs/vboxsf/vfsmod.h | 7 +++++ 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index c4ab5996d97a83..864c2fad23beb3 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -20,17 +20,39 @@ struct vboxsf_handle { struct list_head head; }; -static int vboxsf_file_open(struct inode *inode, struct file *file) +struct vboxsf_handle *vboxsf_create_sf_handle(struct inode *inode, + u64 handle, u32 access_flags) { struct vboxsf_inode *sf_i = VBOXSF_I(inode); - struct shfl_createparms params = {}; struct vboxsf_handle *sf_handle; - u32 access_flags = 0; - int err; sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL); if (!sf_handle) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + + /* init our handle struct and add it to the inode's handles list */ + sf_handle->handle = handle; + sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; + sf_handle->access_flags = access_flags; + kref_init(&sf_handle->refcount); + + mutex_lock(&sf_i->handle_list_mutex); + list_add(&sf_handle->head, &sf_i->handle_list); + mutex_unlock(&sf_i->handle_list_mutex); + + return sf_handle; +} + +static int vboxsf_file_open(struct inode *inode, struct file *file) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_createparms params = {}; + struct vboxsf_handle *sf_handle; + u32 access_flags = 0; + int err; /* * We check the value of params.handle afterwards to find out if @@ -83,23 +105,14 @@ static int vboxsf_file_open(struct inode *inode, struct file *file) err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); if (err == 0 && params.handle == SHFL_HANDLE_NIL) err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT; - if (err) { - kfree(sf_handle); + if (err) return err; - } - - /* the host may have given us different attr then requested */ - sf_i->force_restat = 1; - /* init our handle struct and add it to the inode's handles list */ - sf_handle->handle = params.handle; - sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; - sf_handle->access_flags = access_flags; - kref_init(&sf_handle->refcount); - - mutex_lock(&sf_i->handle_list_mutex); - list_add(&sf_handle->head, &sf_i->handle_list); - mutex_unlock(&sf_i->handle_list_mutex); + sf_handle = vboxsf_create_sf_handle(inode, params.handle, access_flags); + if (IS_ERR(sf_handle)) { + vboxsf_close(sbi->root, params.handle); + return PTR_ERR(sf_handle); + } file->private_data = sf_handle; return 0; @@ -114,22 +127,26 @@ static void vboxsf_handle_release(struct kref *refcount) kfree(sf_handle); } -static int vboxsf_file_release(struct inode *inode, struct file *file) +void vboxsf_release_sf_handle(struct inode *inode, struct vboxsf_handle *sf_handle) { struct vboxsf_inode *sf_i = VBOXSF_I(inode); - struct vboxsf_handle *sf_handle = file->private_data; + mutex_lock(&sf_i->handle_list_mutex); + list_del(&sf_handle->head); + mutex_unlock(&sf_i->handle_list_mutex); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); +} + +static int vboxsf_file_release(struct inode *inode, struct file *file) +{ /* * When a file is closed on our (the guest) side, we want any subsequent * accesses done on the host side to see all changes done from our side. */ filemap_write_and_wait(inode->i_mapping); - mutex_lock(&sf_i->handle_list_mutex); - list_del(&sf_handle->head); - mutex_unlock(&sf_i->handle_list_mutex); - - kref_put(&sf_handle->refcount, vboxsf_handle_release); + vboxsf_release_sf_handle(inode, file->private_data); return 0; } diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h index 6a7a9cedebc6e1..9047befa66c5a8 100644 --- a/fs/vboxsf/vfsmod.h +++ b/fs/vboxsf/vfsmod.h @@ -18,6 +18,8 @@ #define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info) #define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode) +struct vboxsf_handle; + struct vboxsf_options { unsigned long ttl; kuid_t uid; @@ -80,6 +82,11 @@ extern const struct file_operations vboxsf_reg_fops; extern const struct address_space_operations vboxsf_reg_aops; extern const struct dentry_operations vboxsf_dentry_ops; +/* from file.c */ +struct vboxsf_handle *vboxsf_create_sf_handle(struct inode *inode, + u64 handle, u32 access_flags); +void vboxsf_release_sf_handle(struct inode *inode, struct vboxsf_handle *sf_handle); + /* from utils.c */ struct inode *vboxsf_new_inode(struct super_block *sb); int vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, From 52dfd86aa568e433b24357bb5fc725560f1e22d8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 21 Jan 2021 12:54:18 +0100 Subject: [PATCH 006/714] vboxsf: Add support for the atomic_open directory-inode op Opening a new file is done in 2 steps on regular filesystems: 1. Call the create inode-op on the parent-dir to create an inode to hold the meta-data related to the file. 2. Call the open file-op to get a handle for the file. vboxsf however does not really use disk-backed inodes because it is based on passing through file-related system-calls through to the hypervisor. So both steps translate to an open(2) call being passed through to the hypervisor. With the handle returned by the first call immediately being closed again. Making 2 open calls for a single open(..., O_CREATE, ...) calls has 2 problems: a) It is not really efficient. b) It actually breaks some apps. An example of b) is doing a git clone inside a vboxsf mount. When git clone tries to create a tempfile to store the pak files which is downloading the following happens: 1. vboxsf_dir_mkfile() gets called with a mode of 0444 and succeeds. 2. vboxsf_file_open() gets called with file->f_flags containing O_RDWR. When the host is a Linux machine this fails because doing a open(..., O_RDWR) on a file which exists and has mode 0444 results in an -EPERM error. Other network-filesystems and fuse avoid the problem of needing to pass 2 open() calls to the other side by using the atomic_open directory-inode op. This commit fixes git clone not working inside a vboxsf mount, by adding support for the atomic_open directory-inode op. As an added bonus this should also make opening new files faster. The atomic_open implementation is modelled after the atomic_open implementations from the 9p and fuse code. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Reported-by: Ludovic Pouzenc Signed-off-by: Hans de Goede --- fs/vboxsf/dir.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 87d5b2fef592bb..c4769a9396c504 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -308,6 +308,53 @@ static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns, return vboxsf_dir_create(parent, dentry, mode, true, true, NULL); } +static int vboxsf_dir_atomic_open(struct inode *parent, struct dentry *dentry, + struct file *file, unsigned int flags, umode_t mode) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_handle *sf_handle; + struct dentry *res = NULL; + u64 handle; + int err; + + if (d_in_lookup(dentry)) { + res = vboxsf_dir_lookup(parent, dentry, 0); + if (IS_ERR(res)) + return PTR_ERR(res); + + if (res) + dentry = res; + } + + /* Only creates */ + if (!(flags & O_CREAT) || d_really_is_positive(dentry)) + return finish_no_open(file, res); + + err = vboxsf_dir_create(parent, dentry, mode, false, flags & O_EXCL, &handle); + if (err) + goto out; + + sf_handle = vboxsf_create_sf_handle(d_inode(dentry), handle, SHFL_CF_ACCESS_READWRITE); + if (IS_ERR(sf_handle)) { + vboxsf_close(sbi->root, handle); + err = PTR_ERR(sf_handle); + goto out; + } + + err = finish_open(file, dentry, generic_file_open); + if (err) { + /* This also closes the handle passed to vboxsf_create_sf_handle() */ + vboxsf_release_sf_handle(d_inode(dentry), sf_handle); + goto out; + } + + file->private_data = sf_handle; + file->f_mode |= FMODE_CREATED; +out: + dput(res); + return err; +} + static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) { struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); @@ -428,6 +475,7 @@ const struct inode_operations vboxsf_dir_iops = { .lookup = vboxsf_dir_lookup, .create = vboxsf_dir_mkfile, .mkdir = vboxsf_dir_mkdir, + .atomic_open = vboxsf_dir_atomic_open, .rmdir = vboxsf_dir_unlink, .unlink = vboxsf_dir_unlink, .rename = vboxsf_dir_rename, From ca46ad2214473df1a6a9496be17156d65ba89b9f Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 24 Jun 2021 18:37:42 +0930 Subject: [PATCH 007/714] ARM: dts: aspeed: Fix AST2600 machines line names Tacoma and Rainier both have a line-names array that is too long: gpio gpiochip0: gpio-line-names is length 232 but should be at most length 208 This was probably copied from an AST2500 device tree that did have more GPIOs on the controller. Fixes: e9b24b55ca4f ("ARM: dts: aspeed: rainier: Add gpio line names") Fixes: 2f68e4e7df67 ("ARM: dts: aspeed: tacoma: Add gpio line names") Link: https://lore.kernel.org/r/20210624090742.56640-1-joel@jms.id.au Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts | 5 +---- arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts index 941c0489479ac3..481d0ee1f85fbc 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts @@ -280,10 +280,7 @@ /*W0-W7*/ "","","","","","","","", /*X0-X7*/ "","","","","","","","", /*Y0-Y7*/ "","","","","","","","", - /*Z0-Z7*/ "","","","","","","","", - /*AA0-AA7*/ "","","","","","","","", - /*AB0-AB7*/ "","","","","","","","", - /*AC0-AC7*/ "","","","","","","",""; + /*Z0-Z7*/ "","","","","","","",""; pin_mclr_vpp { gpio-hog; diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts index e863ec08897071..48038b2ef3df29 100644 --- a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts @@ -136,10 +136,7 @@ /*W0-W7*/ "","","","","","","","", /*X0-X7*/ "","","","","","","","", /*Y0-Y7*/ "","","","","","","","", - /*Z0-Z7*/ "","","","","","","","", - /*AA0-AA7*/ "","","","","","","","", - /*AB0-AB7*/ "","","","","","","","", - /*AC0-AC7*/ "","","","","","","",""; + /*Z0-Z7*/ "","","","","","","",""; }; &fmc { From 33c8516841ea4fa12fdb8961711bf95095c607ee Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 25 Jun 2021 15:50:39 -0500 Subject: [PATCH 008/714] ASoC: Intel: boards: fix xrun issue on platform with max98373 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL platform with max98373 codec the trigger start sequence is fe first, then codec component and sdw link is the last. Recently a delay was introduced in max98373 codec driver and this resulted to the start of sdw stream transmission was delayed and the data transmitted by fw can't be consumed by sdw controller, so xrun happened. Adding delay in trigger function is a bad idea. This patch enable spk pin in prepare function and disable it in hw_free to avoid xrun issue caused by delay in trigger. Fixes: 3a27875e91fb ("ASoC: max98373: Added 30ms turn on/off time delay") BugLink: https://github.com/thesofproject/sof/issues/4066 Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210625205042.65181-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw_max98373.c | 81 +++++++++++++++-------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw_max98373.c b/sound/soc/intel/boards/sof_sdw_max98373.c index 0e7ed906b34177..25daef910aee18 100644 --- a/sound/soc/intel/boards/sof_sdw_max98373.c +++ b/sound/soc/intel/boards/sof_sdw_max98373.c @@ -55,43 +55,68 @@ static int spk_init(struct snd_soc_pcm_runtime *rtd) return ret; } -static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd) +static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai; int ret; + int j; - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - /* enable max98373 first */ - ret = max_98373_trigger(substream, cmd); - if (ret < 0) - break; - - ret = sdw_trigger(substream, cmd); - break; - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = sdw_trigger(substream, cmd); - if (ret < 0) - break; - - ret = max_98373_trigger(substream, cmd); - break; - default: - ret = -EINVAL; - break; + /* set spk pin by playback only */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + return 0; + + cpu_dai = asoc_rtd_to_cpu(rtd, 0); + for_each_rtd_codec_dais(rtd, j, codec_dai) { + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(cpu_dai->component); + char pin_name[16]; + + snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk", + codec_dai->component->name_prefix); + + if (enable) + ret = snd_soc_dapm_enable_pin(dapm, pin_name); + else + ret = snd_soc_dapm_disable_pin(dapm, pin_name); + + if (!ret) + snd_soc_dapm_sync(dapm); } - return ret; + return 0; +} + +static int mx8373_sdw_prepare(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_prepare dai link prepare is called first */ + ret = sdw_prepare(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, true); +} + +static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_hw_free dai link free is called first */ + ret = sdw_hw_free(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, false); } static const struct snd_soc_ops max_98373_sdw_ops = { .startup = sdw_startup, - .prepare = sdw_prepare, - .trigger = max98373_sdw_trigger, - .hw_free = sdw_hw_free, + .prepare = mx8373_sdw_prepare, + .trigger = sdw_trigger, + .hw_free = mx8373_sdw_hw_free, .shutdown = sdw_shutdown, }; From 0c4f8fd3ed9cb27228497f0ae495ea6cef7017b1 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 27 Jun 2021 11:59:55 +0100 Subject: [PATCH 009/714] ASoC: remove zte zx dangling kconfig In commit dc98f1d we removed the zte zx sound drivers but there was a dangling Kconfig left around for the codec so fix this. Fixes: dc98f1d655ca ("ASoC: remove zte zx drivers") Signed-off-by: Peter Robinson Cc: Arnd Bergmann Cc: Mark Brown Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210627105955.3410015-1-pbrobinson@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3abdda48dc8eb2..bea7b47eddbece 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1813,11 +1813,6 @@ config SND_SOC_ZL38060 which consists of a Digital Signal Processor (DSP), several Digital Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs. -config SND_SOC_ZX_AUD96P22 - tristate "ZTE ZX AUD96P22 CODEC" - depends on I2C - select REGMAP_I2C - # Amp config SND_SOC_LM4857 tristate From dd6fb8ff2210f74b056bf9234d0605e8c26a8ac0 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sat, 26 Jun 2021 16:59:39 +0100 Subject: [PATCH 010/714] ASoC: wm_adsp: Correct wm_coeff_tlv_get handling When wm_coeff_tlv_get was updated it was accidentally switch to the _raw version of the helper causing it to ignore the current DSP state it should be checking. Switch the code back to the correct helper so that users can't read the controls when they arn't available. Fixes: 73ecf1a673d3 ("ASoC: wm_adsp: Correct cache handling of new kernel control API") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210626155941.12251-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 37aa020f23f631..59d876d36cfd8d 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1213,7 +1213,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size); + ret = wm_coeff_read_ctrl(ctl, ctl->cache, size); if (!ret && copy_to_user(bytes, ctl->cache, size)) ret = -EFAULT; From e588332271b9cde6252dac8973b77e580cd639bd Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sat, 26 Jun 2021 16:59:40 +0100 Subject: [PATCH 011/714] ASoC: wm_adsp: Add CCM_CORE_RESET to Halo start core When starting the Halo core it is advised to also write the core reset bit, this ensures the part starts up in the appropriate state. Omitting this doesn't cause issues on most parts but cs40l25 requires it and it is advised on all Halo parts. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210626155941.12251-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 59d876d36cfd8d..549d98241daec1 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -282,6 +282,7 @@ /* * HALO_CCM_CORE_CONTROL */ +#define HALO_CORE_RESET 0x00000200 #define HALO_CORE_EN 0x00000001 /* @@ -3333,7 +3334,8 @@ static int wm_halo_start_core(struct wm_adsp *dsp) { return regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL, - HALO_CORE_EN, HALO_CORE_EN); + HALO_CORE_RESET | HALO_CORE_EN, + HALO_CORE_RESET | HALO_CORE_EN); } static void wm_halo_stop_core(struct wm_adsp *dsp) From 2b6a761be079f9fa8abf3157b5679a6f38885db4 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Sat, 26 Jun 2021 23:58:32 +0800 Subject: [PATCH 012/714] regulator: rtmv20: Fix wrong mask for strobe-polarity-high Fix wrong mask for strobe-polarity-high. Signed-off-by: ChiYuan Huang In-reply-to: Reviewed-by: Axel Lin Link: https://lore.kernel.org/r/1624723112-26653-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/rtmv20-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rtmv20-regulator.c b/drivers/regulator/rtmv20-regulator.c index 852fb2596ffdad..d7372599385ef8 100644 --- a/drivers/regulator/rtmv20-regulator.c +++ b/drivers/regulator/rtmv20-regulator.c @@ -36,7 +36,7 @@ #define RTMV20_WIDTH2_MASK GENMASK(7, 0) #define RTMV20_LBPLVL_MASK GENMASK(3, 0) #define RTMV20_LBPEN_MASK BIT(7) -#define RTMV20_STROBEPOL_MASK BIT(1) +#define RTMV20_STROBEPOL_MASK BIT(0) #define RTMV20_VSYNPOL_MASK BIT(1) #define RTMV20_FSINEN_MASK BIT(7) #define RTMV20_ESEN_MASK BIT(6) From 6549c46af8551b346bcc0b9043f93848319acd5c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 27 Jun 2021 16:04:18 +0800 Subject: [PATCH 013/714] regulator: rt5033: Fix n_voltages settings for BUCK and LDO For linear regulators, the n_voltages should be (max - min) / step + 1. Buck voltage from 1v to 3V, per step 100mV, and vout mask is 0x1f. If value is from 20 to 31, the voltage will all be fixed to 3V. And LDO also, just vout range is different from 1.2v to 3v, step is the same. If value is from 18 to 31, the voltage will also be fixed to 3v. Signed-off-by: Axel Lin Reviewed-by: ChiYuan Huang Link: https://lore.kernel.org/r/20210627080418.1718127-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- include/linux/mfd/rt5033-private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index 2d1895c3efbf2e..40a0c2dfb80ff0 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -200,13 +200,13 @@ enum rt5033_reg { #define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21 /* RT5033 regulator LDO output voltage uV */ #define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U #define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19 /* RT5033 regulator SAFE LDO output voltage uV */ #define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U From 1988e0d84161dabd99d1c27033fbd6ee439bf432 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 4 Jun 2021 01:18:30 +0200 Subject: [PATCH 014/714] drm/panel: nt35510: Do not fail if DSI read fails Failing to read the MTP over DSI should not bring down the system and make us bail out from using the display, it turns out that this happens when toggling the display off and on, and that write is often still working so the display output is just fine. Printing an error is enough. Tested by killing the Gnome session repeatedly on the Samsung Skomer. Fixes: 899f24ed8d3a ("drm/panel: Add driver for Novatek NT35510-based panels") Cc: Stephan Gerhold Reported-by: newbyte@disroot.org Acked-by: Stefan Hansson Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20210603231830.3200040-1-linus.walleij@linaro.org --- drivers/gpu/drm/panel/panel-novatek-nt35510.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35510.c b/drivers/gpu/drm/panel/panel-novatek-nt35510.c index ef70140c5b09da..873cbd38e6d3ab 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35510.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35510.c @@ -706,9 +706,7 @@ static int nt35510_power_on(struct nt35510 *nt) if (ret) return ret; - ret = nt35510_read_id(nt); - if (ret) - return ret; + nt35510_read_id(nt); /* Set up stuff in manufacturer control, page 1 */ ret = nt35510_send_long(nt, dsi, MCS_CMD_MAUCCTR, From 2c70ff56e49ae219640689a0c86041c0f656046f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 28 Jun 2021 23:04:58 +0200 Subject: [PATCH 015/714] ASoC: codecs: allow SSM2518 to be selected by the user Allow the Analog SSM2518 driver to be enabled without a large bunch of other drivers. Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20210628210458.2508973-1-l.stach@pengutronix.de Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index bea7b47eddbece..3a42c461141482 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1325,7 +1325,7 @@ config SND_SOC_SSM2305 high-efficiency mono Class-D audio power amplifiers. config SND_SOC_SSM2518 - tristate + tristate "Analog Devices SSM2518 Class-D Amplifier" depends on I2C config SND_SOC_SSM2602 From 5db5dd5be70eaf808d9fd90174b957fc5c2912cb Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 30 Jun 2021 15:42:46 +0800 Subject: [PATCH 016/714] regulator: hi6421v600: Fix getting wrong drvdata that causes boot failure Since config.dev = pdev->dev.parent in current code, so dev_get_drvdata(rdev->dev.parent) actually returns the drvdata of the mfd device rather than the regulator. Fix it. Fixes: 9bc146acc331 ("regulator: hi6421v600: Fix setting wrong driver_data") Reported-by: Mauro Carvalho Chehab Signed-off-by: Axel Lin Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210630074246.2305166-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/hi6421v600-regulator.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c index 48922704f0e140..a8501e8720d0d4 100644 --- a/drivers/regulator/hi6421v600-regulator.c +++ b/drivers/regulator/hi6421v600-regulator.c @@ -98,10 +98,9 @@ static const unsigned int ldo34_voltages[] = { static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) { - struct hi6421_spmi_reg_priv *priv; + struct hi6421_spmi_reg_priv *priv = rdev_get_drvdata(rdev); int ret; - priv = dev_get_drvdata(rdev->dev.parent); /* cannot enable more than one regulator at one time */ mutex_lock(&priv->enable_mutex); @@ -119,9 +118,10 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; unsigned int reg_val; + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & sreg->eco_mode_mask) @@ -133,9 +133,10 @@ static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev) static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; unsigned int val; + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: val = 0; @@ -159,7 +160,9 @@ hi6421_spmi_regulator_get_optimum_mode(struct regulator_dev *rdev, int input_uV, int output_uV, int load_uA) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; + + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); if (!sreg->eco_uA || ((unsigned int)load_uA > sreg->eco_uA)) return REGULATOR_MODE_NORMAL; @@ -252,13 +255,12 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) return -ENOMEM; mutex_init(&priv->enable_mutex); - platform_set_drvdata(pdev, priv); for (i = 0; i < ARRAY_SIZE(regulator_info); i++) { info = ®ulator_info[i]; config.dev = pdev->dev.parent; - config.driver_data = info; + config.driver_data = priv; config.regmap = pmic->regmap; rdev = devm_regulator_register(dev, &info->desc, &config); From e4a5c19888a5f8a9390860ca493e643be58c8791 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 30 Jun 2021 10:45:19 +0200 Subject: [PATCH 017/714] spi: stm32h7: fix full duplex irq handler handling In case of Full-Duplex mode, DXP flag is set when RXP and TXP flags are set. But to avoid 2 different handlings, just add TXP and RXP flag in the mask instead of DXP, and then keep the initial handling of TXP and RXP events. Also rephrase comment about EOTIE which is one of the interrupt enable bits. It is not triggered by any event. Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/1625042723-661-3-git-send-email-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 8ffcffbb81571c..65b37c8dc49f02 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -884,15 +884,18 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) ier = readl_relaxed(spi->base + STM32H7_SPI_IER); mask = ier; - /* EOTIE is triggered on EOT, SUSP and TXC events. */ + /* + * EOTIE enables irq from EOT, SUSP and TXC events. We need to set + * SUSP to acknowledge it later. TXC is automatically cleared + */ + mask |= STM32H7_SPI_SR_SUSP; /* - * When TXTF is set, DXPIE and TXPIE are cleared. So in case of - * Full-Duplex, need to poll RXP event to know if there are remaining - * data, before disabling SPI. + * DXPIE is set in Full-Duplex, one IT will be raised if TXP and RXP + * are set. So in case of Full-Duplex, need to poll TXP and RXP event. */ - if (spi->rx_buf && !spi->cur_usedma) - mask |= STM32H7_SPI_SR_RXP; + if ((spi->cur_comm == SPI_FULL_DUPLEX) && !spi->cur_usedma) + mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP; if (!(sr & mask)) { dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", From 812bae32e5d50914f75a6e036d3bde39ca86b0c3 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Fri, 16 Apr 2021 02:51:13 -0500 Subject: [PATCH 018/714] ARM: dts: aspeed: Update e3c246d4i vuart properties This device-tree was merged with a provisional vuart IRQ-polarity property that was still under review and ended up taking a somewhat different form. This patch updates it to match the final form of the new vuart properties, which additionally allow specifying the SIRQ number and LPC address. Signed-off-by: Zev Weiss Reviewed-by: Andrew Jeffery Fixes: ca03042f0f12 ("serial: 8250_aspeed_vuart: add aspeed, lpc-io-reg and aspeed, lpc-interrupts DT properties") Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210416075113.18047-1-zev@bewilderbeest.net Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts index 33e413ca07e4a2..9b4cf5ebe6d5fc 100644 --- a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts +++ b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts @@ -4,6 +4,7 @@ #include "aspeed-g5.dtsi" #include #include +#include /{ model = "ASRock E3C246D4I BMC"; @@ -73,7 +74,8 @@ &vuart { status = "okay"; - aspeed,sirq-active-high; + aspeed,lpc-io-reg = <0x2f8>; + aspeed,lpc-interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; }; &mac0 { From 2d6608b57c50c54c3e46649110e8ea5a40959c30 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Fri, 25 Jun 2021 15:40:17 +0930 Subject: [PATCH 019/714] ARM: dts: tacoma: Add phase corrections for eMMC The degree values were reversed out from the magic tap values of 7 (in) and 15 + inversion (out) initially suggested by Aspeed. With the patch tacoma survives several gigabytes of reads and writes using dd while without it locks up randomly during the boot process. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20210625061017.1149942-1-andrew@aj.id.au Fixes: 2fc88f92359d ("mmc: sdhci-of-aspeed: Expose clock phase controls") Fixes: 961216c135a8 ("ARM: dts: aspeed: Add Rainier system") Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts index 48038b2ef3df29..e33153dcaea868 100644 --- a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts @@ -186,6 +186,7 @@ &emmc { status = "okay"; + clk-phase-mmc-hs200 = <36>, <270>; }; &fsim0 { From faffd1b2bde3ee428d6891961f6a60f8e08749d6 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 28 Jun 2021 11:06:05 +0930 Subject: [PATCH 020/714] ARM: dts: everest: Add phase corrections for eMMC The values were determined experimentally via boot tests, not by measuring the bus behaviour with a scope. We plan to do scope measurements to confirm or refine the values and will update the devicetree if necessary once these have been obtained. However, with the patch we can write and read data without issue, where as booting the system without the patch failed at the point of mounting the rootfs. Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20210628013605.1257346-1-andrew@aj.id.au Fixes: 2fc88f92359d ("mmc: sdhci-of-aspeed: Expose clock phase controls") Fixes: a5c5168478d7 ("ARM: dts: aspeed: Add Everest BMC machine") Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts index d26a9e16ff7c30..53c049daf85325 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts @@ -2832,6 +2832,7 @@ &emmc { status = "okay"; + clk-phase-mmc-hs200 = <180>, <180>; }; &fsim0 { From ab4a49d36010564c70fe5586a4c2b1985866616f Mon Sep 17 00:00:00 2001 From: "B. J. Wyman" Date: Thu, 10 Jun 2021 20:29:41 +0000 Subject: [PATCH 021/714] ARM: dts: aspeed: everest: PSU #3 address change The third power supply had an I2C address conflict with another device in the system. The device will have the address changed from 6Ah to 6Dh. Signed-off-by: B. J. Wyman Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20210610202940.3650554-1-bjwyman@gmail.com Fixes: d66d720b64e5 ("ARM: dts: aspeed: everest: Add power supply i2c devices") Signed-off-by: Joel Stanley --- arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts index 53c049daf85325..aa24cac8e5be52 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts @@ -406,14 +406,14 @@ reg = <0x69>; }; - power-supply@6a { + power-supply@6b { compatible = "ibm,cffps"; - reg = <0x6a>; + reg = <0x6b>; }; - power-supply@6b { + power-supply@6d { compatible = "ibm,cffps"; - reg = <0x6b>; + reg = <0x6d>; }; }; From 0fc4dcc13f090c941abfab453a24945a4005b350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 29 Jun 2021 11:39:07 +0200 Subject: [PATCH 022/714] bpf, devmap: Convert remaining READ_ONCE() to rcu_dereference_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were a couple of READ_ONCE()-invocations left-over by the devmap RCU conversion. Convert these to rcu_dereference_check() as well to avoid complaints from sparse. Fixes: 782347b6bcad ("xdp: Add proper __rcu annotations to redirect map entries") Reported-by: kernel test robot Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: Paul E. McKenney Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210629093907.573598-1-toke@redhat.com --- kernel/bpf/devmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2546dafd6672ab..fdc20892837cd6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -558,7 +558,8 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { - dst = READ_ONCE(dtab->netdev_map[i]); + dst = rcu_dereference_check(dtab->netdev_map[i], + rcu_read_lock_bh_held()); if (!is_valid_dst(dst, xdp, exclude_ifindex)) continue; @@ -654,7 +655,8 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { - dst = READ_ONCE(dtab->netdev_map[i]); + dst = rcu_dereference_check(dtab->netdev_map[i], + rcu_read_lock_bh_held()); if (!dst || dst->dev->ifindex == exclude_ifindex) continue; From 9cf76a72af6ab81030dea6481b1d7bdd814fbdaf Mon Sep 17 00:00:00 2001 From: Kyle Russell Date: Mon, 21 Jun 2021 21:09:41 -0400 Subject: [PATCH 023/714] ASoC: tlv320aic31xx: fix reversed bclk/wclk master bits These are backwards from Table 7-71 of the TLV320AIC3100 spec [1]. This was broken in 12eb4d66ba2e when BCLK_MASTER and WCLK_MASTER were converted from 0x08 and 0x04 to BIT(2) and BIT(3), respectively. -#define AIC31XX_BCLK_MASTER 0x08 -#define AIC31XX_WCLK_MASTER 0x04 +#define AIC31XX_BCLK_MASTER BIT(2) +#define AIC31XX_WCLK_MASTER BIT(3) Probably just a typo since the defines were not listed in bit order. [1] https://www.ti.com/lit/gpn/tlv320aic3100 Signed-off-by: Kyle Russell Link: https://lore.kernel.org/r/20210622010941.241386-1-bkylerussell@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 81952984613d2f..2513922a029231 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -151,8 +151,8 @@ struct aic31xx_pdata { #define AIC31XX_WORD_LEN_24BITS 0x02 #define AIC31XX_WORD_LEN_32BITS 0x03 #define AIC31XX_IFACE1_MASTER_MASK GENMASK(3, 2) -#define AIC31XX_BCLK_MASTER BIT(2) -#define AIC31XX_WCLK_MASTER BIT(3) +#define AIC31XX_BCLK_MASTER BIT(3) +#define AIC31XX_WCLK_MASTER BIT(2) /* AIC31XX_DATA_OFFSET */ #define AIC31XX_DATA_OFFSET_MASK GENMASK(7, 0) From 8888ef2304d0ae78f3d5ec19653fa7cc4ffdbd7a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 23 Jun 2021 23:34:43 +0800 Subject: [PATCH 024/714] regulator: bd9576: Fix testing wrong flag in check_temp_flag_mismatch Fix trivial copy-paste typo. Signed-off-by: Axel Lin Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/20210623153443.623856-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/bd9576-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c index e16c3727db7ad9..aa42da4d141edd 100644 --- a/drivers/regulator/bd9576-regulator.c +++ b/drivers/regulator/bd9576-regulator.c @@ -294,9 +294,9 @@ static bool check_temp_flag_mismatch(struct regulator_dev *rdev, int severity, struct bd957x_regulator_data *r) { if ((severity == REGULATOR_SEVERITY_ERR && - r->ovd_notif != REGULATOR_EVENT_OVER_TEMP) || + r->temp_notif != REGULATOR_EVENT_OVER_TEMP) || (severity == REGULATOR_SEVERITY_WARN && - r->ovd_notif != REGULATOR_EVENT_OVER_TEMP_WARN)) { + r->temp_notif != REGULATOR_EVENT_OVER_TEMP_WARN)) { dev_warn(rdev_get_dev(rdev), "Can't support both thermal WARN and ERR\n"); if (severity == REGULATOR_SEVERITY_WARN) From c36748ac545421d94a5091c754414c0f3664bf10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 1 Jul 2021 08:28:25 -0700 Subject: [PATCH 025/714] misc: eeprom: at24: Always append device id even if label property is set. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to append device id even if eeprom have a label property set as some platform can have multiple eeproms with same label and we can not register each of those with same label. Failing to register those eeproms trigger cascade failures on such platform (system is no longer working). This fix regression on such platform introduced with 4e302c3b568e Reported-by: Alexander Fomichev Fixes: 4e302c3b568e ("misc: eeprom: at24: fix NVMEM name with custom AT24 device name") Cc: stable@vger.kernel.org Signed-off-by: Jérôme Glisse Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 7a6f01ace78ace..305ffad131a299 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client *client) } /* - * If the 'label' property is not present for the AT24 EEPROM, - * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, - * and this will append the 'devid' to the name of the NVMEM - * device. This is purely legacy and the AT24 driver has always - * defaulted to this. However, if the 'label' property is - * present then this means that the name is specified by the - * firmware and this name should be used verbatim and so it is - * not necessary to append the 'devid'. + * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the + * label property is set as some platform can have multiple eeproms + * with same label and we can not register each of those with same + * label. Failing to register those eeproms trigger cascade failure + * on such platform. */ + nvmem_config.id = NVMEM_DEVID_AUTO; + if (device_property_present(dev, "label")) { - nvmem_config.id = NVMEM_DEVID_NONE; err = device_property_read_string(dev, "label", &nvmem_config.name); if (err) return err; } else { - nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.name = dev_name(dev); } From 56ea7ed103b46970e171eb1c95916f393d64eeff Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 13 May 2021 17:31:03 -0700 Subject: [PATCH 026/714] igc: Fix use-after-free error during reset Cleans the next descriptor to watch (next_to_watch) when cleaning the TX ring. Failure to do so can cause invalid memory accesses. If igc_poll() runs while the controller is being reset this can lead to the driver try to free a skb that was already freed. Log message: [ 101.525242] refcount_t: underflow; use-after-free. [ 101.525251] WARNING: CPU: 1 PID: 646 at lib/refcount.c:28 refcount_warn_saturate+0xab/0xf0 [ 101.525259] Modules linked in: sch_etf(E) sch_mqprio(E) rfkill(E) intel_rapl_msr(E) intel_rapl_common(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) binfmt_misc(E) kvm_intel(E) kvm(E) irqbypass(E) crc32_pclmul(E) ghash_clmulni_intel(E) aesni_intel(E) mei_wdt(E) libaes(E) crypto_simd(E) cryptd(E) glue_helper(E) snd_hda_codec_hdmi(E) rapl(E) intel_cstate(E) snd_hda_intel(E) snd_intel_dspcfg(E) sg(E) soundwire_intel(E) intel_uncore(E) at24(E) soundwire_generic_allocation(E) iTCO_wdt(E) soundwire_cadence(E) intel_pmc_bxt(E) serio_raw(E) snd_hda_codec(E) iTCO_vendor_support(E) watchdog(E) snd_hda_core(E) snd_hwdep(E) snd_soc_core(E) snd_compress(E) snd_pcsp(E) soundwire_bus(E) snd_pcm(E) evdev(E) snd_timer(E) mei_me(E) snd(E) soundcore(E) mei(E) configfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) sd_mod(E) t10_pi(E) crc_t10dif(E) crct10dif_generic(E) i915(E) ahci(E) libahci(E) ehci_pci(E) igb(E) xhci_pci(E) ehci_hcd(E) [ 101.525303] drm_kms_helper(E) dca(E) xhci_hcd(E) libata(E) crct10dif_pclmul(E) cec(E) crct10dif_common(E) tsn(E) igc(E) e1000e(E) ptp(E) i2c_i801(E) crc32c_intel(E) psmouse(E) i2c_algo_bit(E) i2c_smbus(E) scsi_mod(E) lpc_ich(E) pps_core(E) usbcore(E) drm(E) button(E) video(E) [ 101.525318] CPU: 1 PID: 646 Comm: irq/37-enp7s0-T Tainted: G E 5.10.30-rt37-tsn1-rt-ipipe #ipipe [ 101.525320] Hardware name: SIEMENS AG SIMATIC IPC427D/A5E31233588, BIOS V17.02.09 03/31/2017 [ 101.525322] RIP: 0010:refcount_warn_saturate+0xab/0xf0 [ 101.525325] Code: 05 31 48 44 01 01 e8 f0 c6 42 00 0f 0b c3 80 3d 1f 48 44 01 00 75 90 48 c7 c7 78 a8 f3 a6 c6 05 0f 48 44 01 01 e8 d1 c6 42 00 <0f> 0b c3 80 3d fe 47 44 01 00 0f 85 6d ff ff ff 48 c7 c7 d0 a8 f3 [ 101.525327] RSP: 0018:ffffbdedc0917cb8 EFLAGS: 00010286 [ 101.525329] RAX: 0000000000000000 RBX: ffff98fd6becbf40 RCX: 0000000000000001 [ 101.525330] RDX: 0000000000000001 RSI: ffffffffa6f2700c RDI: 00000000ffffffff [ 101.525332] RBP: ffff98fd6becc14c R08: ffffffffa7463d00 R09: ffffbdedc0917c50 [ 101.525333] R10: ffffffffa74c3578 R11: 0000000000000034 R12: 00000000ffffff00 [ 101.525335] R13: ffff98fd6b0b1000 R14: 0000000000000039 R15: ffff98fd6be35c40 [ 101.525337] FS: 0000000000000000(0000) GS:ffff98fd6e240000(0000) knlGS:0000000000000000 [ 101.525339] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 101.525341] CR2: 00007f34135a3a70 CR3: 0000000150210003 CR4: 00000000001706e0 [ 101.525343] Call Trace: [ 101.525346] sock_wfree+0x9c/0xa0 [ 101.525353] unix_destruct_scm+0x7b/0xa0 [ 101.525358] skb_release_head_state+0x40/0x90 [ 101.525362] skb_release_all+0xe/0x30 [ 101.525364] napi_consume_skb+0x57/0x160 [ 101.525367] igc_poll+0xb7/0xc80 [igc] [ 101.525376] ? sched_clock+0x5/0x10 [ 101.525381] ? sched_clock_cpu+0xe/0x100 [ 101.525385] net_rx_action+0x14c/0x410 [ 101.525388] __do_softirq+0xe9/0x2f4 [ 101.525391] __local_bh_enable_ip+0xe3/0x110 [ 101.525395] ? irq_finalize_oneshot.part.47+0xe0/0xe0 [ 101.525398] irq_forced_thread_fn+0x6a/0x80 [ 101.525401] irq_thread+0xe8/0x180 [ 101.525403] ? wake_threads_waitq+0x30/0x30 [ 101.525406] ? irq_thread_check_affinity+0xd0/0xd0 [ 101.525408] kthread+0x183/0x1a0 [ 101.525412] ? kthread_park+0x80/0x80 [ 101.525415] ret_from_fork+0x22/0x30 Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Reported-by: Erez Geva Signed-off-by: Vinicius Costa Gomes Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 95323095094dd7..1d00a63eb935cc 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -232,6 +232,8 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); } + tx_buffer->next_to_watch = NULL; + /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; i++; From 7b292608db23ccbbfbfa50cdb155d01725d7a52e Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 13 May 2021 17:31:04 -0700 Subject: [PATCH 027/714] igb: Fix use-after-free error during reset Cleans the next descriptor to watch (next_to_watch) when cleaning the TX ring. Failure to do so can cause invalid memory accesses. If igb_poll() runs while the controller is reset this can lead to the driver try to free a skb that was already freed. (The crash is harder to reproduce with the igb driver, but the same potential problem exists as the code is identical to igc) Fixes: 7cc6fd4c60f2 ("igb: Don't bother clearing Tx buffer_info in igb_clean_tx_ring") Signed-off-by: Vinicius Costa Gomes Reported-by: Erez Geva Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7e6435dc7e80cf..a61e2e5e95c008 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4835,6 +4835,8 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) DMA_TO_DEVICE); } + tx_buffer->next_to_watch = NULL; + /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; i++; From 05682a0a61b6cbecd97a0f37f743b2cbfd516977 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 21 May 2021 12:50:19 -0700 Subject: [PATCH 028/714] igc: change default return of igc_read_phy_reg() Static analysis reports this problem igc_main.c:4944:20: warning: The left operand of '&' is a garbage value if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && ~~~~~~~~ ^ phy_data is set by the call to igc_read_phy_reg() only if there is a read_reg() op, else it is unset and a 0 is returned. Change the return to -EOPNOTSUPP. Fixes: 208983f099d9 ("igc: Add watchdog") Signed-off-by: Tom Rix Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 9e0bbb2e55e313..5901ed9fb545e8 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -578,7 +578,7 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) if (hw->phy.ops.read_reg) return hw->phy.ops.read_reg(hw, offset, data); - return 0; + return -EOPNOTSUPP; } void igc_reinit_locked(struct igc_adapter *); From dd2aefcd5e37989ae5f90afdae44bbbf3a2990da Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 12 Jun 2021 15:46:09 +0200 Subject: [PATCH 029/714] ixgbe: Fix an error handling path in 'ixgbe_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: 6fabd715e6d8 ("ixgbe: Implement PCIe AER support") Signed-off-by: Christophe JAILLET Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ffff69efd78a68..913253f8ecb4ec 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -11067,6 +11067,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); err_alloc_etherdev: + pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: From c6bc9e5ce5d37cb3e6b552f41b92a193db1806ab Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 12 Jun 2021 22:00:05 +0200 Subject: [PATCH 030/714] igc: Fix an error handling path in 'igc_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: c9a11c23ceb6 ("igc: Add netdev") Signed-off-by: Christophe JAILLET Tested-by: Dvora Fuxbrumer Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1d00a63eb935cc..e29aadbc674418 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6056,6 +6056,7 @@ static int igc_probe(struct pci_dev *pdev, err_ioremap: free_netdev(netdev); err_alloc_etherdev: + pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: From fea03b1cebd653cd095f2e9a58cfe1c85661c363 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 12 Jun 2021 22:08:33 +0200 Subject: [PATCH 031/714] igb: Fix an error handling path in 'igb_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: 40a914fa72ab ("igb: Add support for pci-e Advanced Error Reporting") Signed-off-by: Christophe JAILLET Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a61e2e5e95c008..abc239b736fb4d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3615,6 +3615,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_ioremap: free_netdev(netdev); err_alloc_etherdev: + pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: From e85e14d68f517ef12a5fb8123fff65526b35b6cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 16 Jun 2021 07:00:36 +0200 Subject: [PATCH 032/714] fm10k: Fix an error handling path in 'fm10k_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: 19ae1b3fb99c ("fm10k: Add support for PCI power management and error handling") Signed-off-by: Christophe JAILLET Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index dbcae92bb18da1..adfa2768f024dc 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2227,6 +2227,7 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_ioremap: free_netdev(netdev); err_alloc_netdev: + pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: From 4589075608420bc49fcef6e98279324bf2bb91ae Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 16 Jun 2021 07:05:53 +0200 Subject: [PATCH 033/714] e1000e: Fix an error handling path in 'e1000_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: 111b9dc5c981 ("e1000e: add aer support") Signed-off-by: Christophe JAILLET Acked-by: Sasha Neftin Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d150dade06cf1e..757a54c39eefd8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7664,6 +7664,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_ioremap: free_netdev(netdev); err_alloc_etherdev: + pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_reg: err_dma: From af30cbd2f4d6d66a9b6094e0aa32420bc8b20e08 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 16 Jun 2021 07:53:02 +0200 Subject: [PATCH 034/714] iavf: Fix an error handling path in 'iavf_probe()' If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it must be undone by a corresponding 'pci_disable_pcie_error_reporting()' call, as already done in the remove function. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Christophe JAILLET Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index e612c24fa38429..44bafedd09f286 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3798,6 +3798,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_ioremap: free_netdev(netdev); err_alloc_etherdev: + pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); err_pci_reg: err_dma: From 6c19d772618fea40d9681f259368f284a330fd90 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 22 Apr 2021 10:19:23 +0000 Subject: [PATCH 035/714] igb: Check if num of q_vectors is smaller than max before array access Ensure that the adapter->q_vector[MAX_Q_VECTORS] array isn't accessed beyond its size. It was fixed by using a local variable num_q_vectors as a limit for loop index, and ensure that num_q_vectors is not bigger than MAX_Q_VECTORS. Fixes: 047e0030f1e6 ("igb: add new data structure for handling interrupts and NAPI") Signed-off-by: Aleksandr Loktionov Reviewed-by: Grzegorz Siwik Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Slawomir Laba Reviewed-by: Sylwester Dziedziuch Reviewed-by: Mateusz Palczewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index abc239b736fb4d..9470ba8914837c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -931,6 +931,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) **/ static int igb_request_msix(struct igb_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; struct net_device *netdev = adapter->netdev; int i, err = 0, vector = 0, free_vector = 0; @@ -939,7 +940,13 @@ static int igb_request_msix(struct igb_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igb_q_vector *q_vector = adapter->q_vector[i]; vector++; From 382a7c20d9253bcd5715789b8179528d0f3de72c Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 11 Jun 2021 22:42:17 +0000 Subject: [PATCH 036/714] igb: Fix position of assignment to *ring Assignment to *ring should be done after correctness check of the argument queue. Fixes: 91db364236c8 ("igb: Refactor igb_configure_cbs()") Signed-off-by: Jedrzej Jagielski Acked-by: Vinicius Costa Gomes Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9470ba8914837c..171a7a629b203b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1685,14 +1685,15 @@ static bool is_any_txtime_enabled(struct igb_adapter *adapter) **/ static void igb_config_tx_modes(struct igb_adapter *adapter, int queue) { - struct igb_ring *ring = adapter->tx_ring[queue]; struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; + struct igb_ring *ring; u32 tqavcc, tqavctrl; u16 value; WARN_ON(hw->mac.type != e1000_i210); WARN_ON(queue < 0 || queue > 1); + ring = adapter->tx_ring[queue]; /* If any of the Qav features is enabled, configure queues as SR and * with HIGH PRIO. If none is, then configure them with LOW PRIO and From a3609ac24c18947737f5bc1746b8735814c521d1 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 29 Jun 2021 06:43:05 +0200 Subject: [PATCH 037/714] net: usb: asix: ax88772: suspend PHY on driver probe After probe/bind sequence is the PHY in active state, even if interface is stopped. As result, on some systems like Samsung Exynos5250 SoC based Arndale board, the ASIX PHY will be able to negotiate the link but fail to transmit the data. To handle it, suspend the PHY on probe. Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Signed-off-by: Oleksij Rempel Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index aec97b021a7353..2c115216420a43 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -701,6 +701,7 @@ static int ax88772_init_phy(struct usbnet *dev) return ret; } + phy_suspend(priv->phydev); priv->phydev->mac_managed_pm = 1; phy_attached_info(priv->phydev); From b18114476a1432ad1db5d5605bc8cd131814d264 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Tue, 29 Jun 2021 09:22:11 +0200 Subject: [PATCH 038/714] openvswitch: Optimize operation for key comparison In the current implement when comparing two flow keys, we will return result after comparing the whole key from start to end. In our optimization, we will return result in the first none-zero comparison, then we will improve the flow table looking up efficiency. Signed-off-by: Baowen Zheng Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index c89c8da99f1a2e..d4a2db0b229989 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -670,13 +670,13 @@ static bool cmp_key(const struct sw_flow_key *key1, { const long *cp1 = (const long *)((const u8 *)key1 + key_start); const long *cp2 = (const long *)((const u8 *)key2 + key_start); - long diffs = 0; int i; for (i = key_start; i < key_end; i += sizeof(long)) - diffs |= *cp1++ ^ *cp2++; + if (*cp1++ ^ *cp2++) + return false; - return diffs == 0; + return true; } static bool flow_cmp_masked_key(const struct sw_flow *flow, From 996af62167d0e0ec69b938a3561e96f84ffff1aa Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 29 Jun 2021 15:22:37 +0800 Subject: [PATCH 039/714] net/802/mrp: fix memleak in mrp_request_join() I got kmemleak report when doing fuzz test: BUG: memory leak unreferenced object 0xffff88810c239500 (size 64): comm "syz-executor940", pid 882, jiffies 4294712870 (age 14.631s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 01 00 00 00 01 02 00 04 ................ backtrace: [<00000000a323afa4>] slab_alloc_node mm/slub.c:2972 [inline] [<00000000a323afa4>] slab_alloc mm/slub.c:2980 [inline] [<00000000a323afa4>] __kmalloc+0x167/0x340 mm/slub.c:4130 [<000000005034ca11>] kmalloc include/linux/slab.h:595 [inline] [<000000005034ca11>] mrp_attr_create net/802/mrp.c:276 [inline] [<000000005034ca11>] mrp_request_join+0x265/0x550 net/802/mrp.c:530 [<00000000fcfd81f3>] vlan_mvrp_request_join+0x145/0x170 net/8021q/vlan_mvrp.c:40 [<000000009258546e>] vlan_dev_open+0x477/0x890 net/8021q/vlan_dev.c:292 [<0000000059acd82b>] __dev_open+0x281/0x410 net/core/dev.c:1609 [<000000004e6dc695>] __dev_change_flags+0x424/0x560 net/core/dev.c:8767 [<00000000471a09af>] rtnl_configure_link+0xd9/0x210 net/core/rtnetlink.c:3122 [<0000000037a4672b>] __rtnl_newlink+0xe08/0x13e0 net/core/rtnetlink.c:3448 [<000000008d5d0fda>] rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3488 [<000000004882fe39>] rtnetlink_rcv_msg+0x369/0xa10 net/core/rtnetlink.c:5552 [<00000000907e6c54>] netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2504 [<00000000e7d7a8c4>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] [<00000000e7d7a8c4>] netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1340 [<00000000e0645d50>] netlink_sendmsg+0x78e/0xc90 net/netlink/af_netlink.c:1929 [<00000000c24559b7>] sock_sendmsg_nosec net/socket.c:654 [inline] [<00000000c24559b7>] sock_sendmsg+0x139/0x170 net/socket.c:674 [<00000000fc210bc2>] ____sys_sendmsg+0x658/0x7d0 net/socket.c:2350 [<00000000be4577b5>] ___sys_sendmsg+0xf8/0x170 net/socket.c:2404 Calling mrp_request_leave() after mrp_request_join(), the attr->state is set to MRP_APPLICANT_VO, mrp_attr_destroy() won't be called in last TX event in mrp_uninit_applicant(), the attr of applicant will be leaked. To fix this leak, iterate and free each attr of applicant before rerturning from mrp_uninit_applicant(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/802/mrp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/802/mrp.c b/net/802/mrp.c index bea6e43d45a0dd..35e04cc5390c49 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -292,6 +292,19 @@ static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) kfree(attr); } +static void mrp_attr_destroy_all(struct mrp_applicant *app) +{ + struct rb_node *node, *next; + struct mrp_attr *attr; + + for (node = rb_first(&app->mad); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct mrp_attr, node); + mrp_attr_destroy(app, attr); + } +} + static int mrp_pdu_init(struct mrp_applicant *app) { struct sk_buff *skb; @@ -895,6 +908,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); + mrp_attr_destroy_all(app); mrp_pdu_queue(app); spin_unlock_bh(&app->lock); From a34dcbfa1475f18a8f1b1dc3dedb76d746874e61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Jun 2021 11:19:44 +0300 Subject: [PATCH 040/714] sctp: prevent info leak in sctp_make_heartbeat() The "hbinfo" struct has a 4 byte hole at the end so we have to zero it out to prevent stack information from being disclosed. Fixes: fe59379b9ab7 ("sctp: do the basic send and recv for PLPMTUD probe") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6c08e5048d3839..b8fa8f1a727704 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1163,7 +1163,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, __u32 probe_size) { - struct sctp_sender_hb_info hbinfo; + struct sctp_sender_hb_info hbinfo = {}; struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, From 42ca63f980842918560b25f0244307fd83b4777c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 29 Jun 2021 19:53:28 +0800 Subject: [PATCH 041/714] net/802/garp: fix memleak in garp_request_join() I got kmemleak report when doing fuzz test: BUG: memory leak unreferenced object 0xffff88810c909b80 (size 64): comm "syz", pid 957, jiffies 4295220394 (age 399.090s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 08 00 00 00 01 02 00 04 ................ backtrace: [<00000000ca1f2e2e>] garp_request_join+0x285/0x3d0 [<00000000bf153351>] vlan_gvrp_request_join+0x15b/0x190 [<0000000024005e72>] vlan_dev_open+0x706/0x980 [<00000000dc20c4d4>] __dev_open+0x2bb/0x460 [<0000000066573004>] __dev_change_flags+0x501/0x650 [<0000000035b42f83>] rtnl_configure_link+0xee/0x280 [<00000000a5e69de0>] __rtnl_newlink+0xed5/0x1550 [<00000000a5258f4a>] rtnl_newlink+0x66/0x90 [<00000000506568ee>] rtnetlink_rcv_msg+0x439/0xbd0 [<00000000b7eaeae1>] netlink_rcv_skb+0x14d/0x420 [<00000000c373ce66>] netlink_unicast+0x550/0x750 [<00000000ec74ce74>] netlink_sendmsg+0x88b/0xda0 [<00000000381ff246>] sock_sendmsg+0xc9/0x120 [<000000008f6a2db3>] ____sys_sendmsg+0x6e8/0x820 [<000000008d9c1735>] ___sys_sendmsg+0x145/0x1c0 [<00000000aa39dd8b>] __sys_sendmsg+0xfe/0x1d0 Calling garp_request_leave() after garp_request_join(), the attr->state is set to GARP_APPLICANT_VO, garp_attr_destroy() won't be called in last transmit event in garp_uninit_applicant(), the attr of applicant will be leaked. To fix this leak, iterate and free each attr of applicant before rerturning from garp_uninit_applicant(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/802/garp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/802/garp.c b/net/802/garp.c index 400bd857e5f574..f6012f8e59f005 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -203,6 +203,19 @@ static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr kfree(attr); } +static void garp_attr_destroy_all(struct garp_applicant *app) +{ + struct rb_node *node, *next; + struct garp_attr *attr; + + for (node = rb_first(&app->gid); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct garp_attr, node); + garp_attr_destroy(app, attr); + } +} + static int garp_pdu_init(struct garp_applicant *app) { struct sk_buff *skb; @@ -609,6 +622,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl spin_lock_bh(&app->lock); garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_attr_destroy_all(app); garp_pdu_queue(app); spin_unlock_bh(&app->lock); From 0dbffbb5335a1e3aa6855e4ee317e25e669dd302 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Jun 2021 07:12:45 -0700 Subject: [PATCH 042/714] net: annotate data race around sk_ll_usec sk_ll_usec is read locklessly from sk_can_busy_loop() while another thread can change its value in sock_setsockopt() This is correct but needs annotations. BUG: KCSAN: data-race in __skb_try_recv_datagram / sock_setsockopt write to 0xffff88814eb5f904 of 4 bytes by task 14011 on cpu 0: sock_setsockopt+0x1287/0x2090 net/core/sock.c:1175 __sys_setsockopt+0x14f/0x200 net/socket.c:2100 __do_sys_setsockopt net/socket.c:2115 [inline] __se_sys_setsockopt net/socket.c:2112 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2112 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88814eb5f904 of 4 bytes by task 14001 on cpu 1: sk_can_busy_loop include/net/busy_poll.h:41 [inline] __skb_try_recv_datagram+0x14f/0x320 net/core/datagram.c:273 unix_dgram_recvmsg+0x14c/0x870 net/unix/af_unix.c:2101 unix_seqpacket_recvmsg+0x5a/0x70 net/unix/af_unix.c:2067 ____sys_recvmsg+0x15d/0x310 include/linux/uio.h:244 ___sys_recvmsg net/socket.c:2598 [inline] do_recvmmsg+0x35c/0x9f0 net/socket.c:2692 __sys_recvmmsg net/socket.c:2771 [inline] __do_sys_recvmmsg net/socket.c:2794 [inline] __se_sys_recvmmsg net/socket.c:2787 [inline] __x64_sys_recvmmsg+0xcf/0x150 net/socket.c:2787 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000000 -> 0x00000101 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 14001 Comm: syz-executor.3 Not tainted 5.13.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 73af4a64a59996..40296ed976a977 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -38,7 +38,7 @@ static inline bool net_busy_loop_on(void) static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && !signal_pending(current); + return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current); } bool sk_busy_loop_end(void *p, unsigned long start_time); diff --git a/net/core/sock.c b/net/core/sock.c index ba1c0f75cd45b4..dd9599656c40d7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1201,7 +1201,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val < 0) ret = -EINVAL; else - sk->sk_ll_usec = val; + WRITE_ONCE(sk->sk_ll_usec, val); } break; case SO_PREFER_BUSY_POLL: From 873a1e3d207ae587a7a1cc1d84545146b449ea5d Mon Sep 17 00:00:00 2001 From: Harman Kalra Date: Tue, 29 Jun 2021 22:30:04 +0530 Subject: [PATCH 043/714] octeontx2-af: cn10k: Setting up lmtst map table Introducing a new mailbox to support updating lmt entries and common lmt base address scheme i.e. multiple pcifuncs can share lmt region to reduce L1 cache pressure for application. Parameters passed to mailbox includes the primary pcifunc value whose lmt regions will be shared by other secondary pcifuncs. Here secondary pcifunc will be the one who is calling the mailbox. For example: By default each pcifunc has its own LMT base address: PCIFUNC1 LMT_BASE_ADDR A PCIFUNC2 LMT_BASE_ADDR B PCIFUNC3 LMT_BASE_ADDR C PCIFUNC4 LMT_BASE_ADDR D Application will choose PCIFUNC1 as base/primary pcifunc and as and when other pcifunc(secondary pcifuncs) gets probed, this mailbox will be called and LMTST table will be updated as: PCIFUNC1 LMT_BASE_ADDR A PCIFUNC2 LMT_BASE_ADDR A PCIFUNC3 LMT_BASE_ADDR A PCIFUNC4 LMT_BASE_ADDR A On FLR lmtst map table gets resetted to the default lmt base addresses for all secondary pcifuncs. Signed-off-by: Harman Kalra Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 7 + .../net/ethernet/marvell/octeontx2/af/rvu.c | 1 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 4 + .../ethernet/marvell/octeontx2/af/rvu_cn10k.c | 140 ++++++++++++++++++ .../ethernet/marvell/octeontx2/af/rvu_reg.h | 5 + .../marvell/octeontx2/af/rvu_struct.h | 3 +- 6 files changed, 159 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 770d86262838fe..638db868125aea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -134,6 +134,8 @@ M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \ M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \ M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \ M(GET_HW_CAP, 0x008, get_hw_cap, msg_req, get_hw_cap_rsp) \ +M(LMTST_TBL_SETUP, 0x00a, lmtst_tbl_setup, lmtst_tbl_setup_req, \ + msg_rsp) \ M(SET_VF_PERM, 0x00b, set_vf_perm, set_vf_perm, msg_rsp) \ /* CGX mbox IDs (range 0x200 - 0x3FF) */ \ M(CGX_START_RXTX, 0x200, cgx_start_rxtx, msg_req, msg_rsp) \ @@ -1278,6 +1280,11 @@ struct set_vf_perm { u64 flags; }; +struct lmtst_tbl_setup_req { + struct mbox_msghdr hdr; + u16 base_pcifunc; +}; + /* CPT mailbox error codes * Range 901 - 1000. */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 0b092949d7aced..10cddf1ac7b9ef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2333,6 +2333,7 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSOW); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSO); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NPA); + rvu_reset_lmt_map_tbl(rvu, pcifunc); rvu_detach_rsrcs(rvu, NULL, pcifunc); mutex_unlock(&rvu->flr_lock); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 9e5d9ba6f01e01..3c0a7e981f72b5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -243,6 +243,7 @@ struct rvu_pfvf { u8 nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */ u8 nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */ u8 nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */ + u64 lmt_base_addr; /* Preseving the pcifunc's lmtst base addr*/ unsigned long flags; }; @@ -754,6 +755,9 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); int rvu_set_channels_base(struct rvu *rvu); void rvu_program_channels(struct rvu *rvu); +/* CN10K RVU - LMT*/ +void rvu_reset_lmt_map_tbl(struct rvu *rvu, u16 pcifunc); + #ifdef CONFIG_DEBUG_FS void rvu_dbg_init(struct rvu *rvu); void rvu_dbg_exit(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7d9e71c6965fb0..87f56e1f32e327 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -10,6 +10,146 @@ #include "cgx.h" #include "rvu_reg.h" +/* RVU LMTST */ +#define LMT_TBL_OP_READ 0 +#define LMT_TBL_OP_WRITE 1 +#define LMT_MAP_TABLE_SIZE (128 * 1024) +#define LMT_MAPTBL_ENTRY_SIZE 16 + +/* Function to perform operations (read/write) on lmtst map table */ +static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, + int lmt_tbl_op) +{ + void __iomem *lmt_map_base; + u64 tbl_base; + + tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + + lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + if (!lmt_map_base) { + dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); + return -ENOMEM; + } + + if (lmt_tbl_op == LMT_TBL_OP_READ) { + *val = readq(lmt_map_base + index); + } else { + writeq((*val), (lmt_map_base + index)); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S + * changes effective. Write 1 for flush and read is being used as a + * barrier and sets up a data dependency. Write to 0 after a write + * to 1 to complete the flush. + */ + rvu_write64(rvu, BLKADDR_APR, APR_AF_LMT_CTL, BIT_ULL(0)); + rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CTL); + rvu_write64(rvu, BLKADDR_APR, APR_AF_LMT_CTL, 0x00); + } + + iounmap(lmt_map_base); + return 0; +} + +static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) +{ + return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; +} + +int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu, + struct lmtst_tbl_setup_req *req, + struct msg_rsp *rsp) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + u32 pri_tbl_idx, sec_tbl_idx; + int err = 0; + u64 val; + + /* Reconfiguring lmtst map table in lmt region shared mode i.e. make + * multiple PF_FUNCs to share an LMTLINE region, so primary/base + * pcifunc (which is passed as an argument to mailbox) is the one + * whose lmt base address will be shared among other secondary + * pcifunc (will be the one who is calling this mailbox). + */ + if (req->base_pcifunc) { + /* Calculating the LMT table index equivalent to primary + * pcifunc. + */ + pri_tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->base_pcifunc); + + /* Truncating secondary pcifunc to calculate the LMT table index + * equivalent to secondary pcifunc. + */ + sec_tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->hdr.pcifunc); + /* Read the base lmt addr of the secondary pcifunc */ + err = lmtst_map_table_ops(rvu, sec_tbl_idx, &val, + LMT_TBL_OP_READ); + if (err) { + dev_err(rvu->dev, + "Failed to read LMT map table: index 0x%x err %d\n", + sec_tbl_idx, err); + goto error; + } + + /* Storing the seondary's lmt base address as this needs to be + * reverted in FLR. Also making sure this default value doesn't + * get overwritten on multiple calls to this mailbox. + */ + if (!pfvf->lmt_base_addr) + pfvf->lmt_base_addr = val; + + /* Read the base lmt addr of the primary pcifunc */ + err = lmtst_map_table_ops(rvu, pri_tbl_idx, &val, + LMT_TBL_OP_READ); + if (err) { + dev_err(rvu->dev, + "Failed to read LMT map table: index 0x%x err %d\n", + pri_tbl_idx, err); + goto error; + } + + /* Update the base lmt addr of secondary with primary's base + * lmt addr. + */ + err = lmtst_map_table_ops(rvu, sec_tbl_idx, &val, + LMT_TBL_OP_WRITE); + if (err) { + dev_err(rvu->dev, + "Failed to update LMT map table: index 0x%x err %d\n", + sec_tbl_idx, err); + goto error; + } + } + +error: + return err; +} + +/* Resetting the lmtst map table to original base addresses */ +void rvu_reset_lmt_map_tbl(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + u32 tbl_idx; + int err; + + if (is_rvu_otx2(rvu)) + return; + + if (pfvf->lmt_base_addr) { + /* This corresponds to lmt map table index */ + tbl_idx = rvu_get_lmtst_tbl_index(rvu, pcifunc); + /* Reverting back original lmt base addr for respective + * pcifunc. + */ + err = lmtst_map_table_ops(rvu, tbl_idx, &pfvf->lmt_base_addr, + LMT_TBL_OP_WRITE); + if (err) + dev_err(rvu->dev, + "Failed to update LMT map table: index 0x%x err %d\n", + tbl_idx, err); + pfvf->lmt_base_addr = 0; + } +} + int rvu_set_channels_base(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 76837d5e19c63f..61bafe956aaebc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -692,4 +692,9 @@ #define LBK_LINK_CFG_ID_MASK GENMASK_ULL(11, 6) #define LBK_LINK_CFG_BASE_MASK GENMASK_ULL(5, 0) +/* APR */ +#define APR_AF_LMT_CFG (0x000ull) +#define APR_AF_LMT_MAP_BASE (0x008ull) +#define APR_AF_LMT_CTL (0x010ull) + #endif /* RVU_REG_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 14aa8e37ea4139..5bbe6727d11d0b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -35,7 +35,8 @@ enum rvu_block_addr_e { BLKADDR_NDC_NPA0 = 0xeULL, BLKADDR_NDC_NIX1_RX = 0x10ULL, BLKADDR_NDC_NIX1_TX = 0x11ULL, - BLK_COUNT = 0x12ULL, + BLKADDR_APR = 0x16ULL, + BLK_COUNT = 0x17ULL, }; /* RVU Block Type Enumeration */ From 893ae97214c385be02f8ec097298cc48c7f0d905 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 29 Jun 2021 22:30:05 +0530 Subject: [PATCH 044/714] octeontx2-af: cn10k: Support configurable LMTST regions This patch extends the lmtst_tbl_setup_req mbox to support run time LMTST configuration. RVU PF/VF and DPDK/ODP allocates a LMT region, creates a translation entry for a device via VFIO IOCTLs. This IOVA is shared with AF through above mbox. AF then uses RVU_SMMU transulation Widget and gets PA for the IOVA and updates the LMTtable entry for that device. Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 3 + .../ethernet/marvell/octeontx2/af/rvu_cn10k.c | 130 +++++++++++++----- .../ethernet/marvell/octeontx2/af/rvu_reg.h | 5 + 3 files changed, 103 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 638db868125aea..9672cbf8a90a69 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1283,6 +1283,9 @@ struct set_vf_perm { struct lmtst_tbl_setup_req { struct mbox_msghdr hdr; u16 base_pcifunc; + u8 use_local_lmt_region; + u64 lmt_iova; + u64 rsvd[4]; }; /* CPT mailbox error codes diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 87f56e1f32e327..8d48b64485c69e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -55,14 +55,101 @@ static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } +static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, + u64 iova, u64 *lmt_addr) +{ + u64 pa, val, pf; + int err; + + if (!iova) { + dev_err(rvu->dev, "%s Requested Null address for transulation\n", __func__); + return -EINVAL; + } + + rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); + pf = rvu_get_pf(pcifunc) & 0x1F; + val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | + ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); + rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); + + err = rvu_poll_reg(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS, BIT_ULL(0), false); + if (err) { + dev_err(rvu->dev, "%s LMTLINE iova transulation failed\n", __func__); + return err; + } + val = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS); + if (val & ~0x1ULL) { + dev_err(rvu->dev, "%s LMTLINE iova transulation failed err:%llx\n", __func__, val); + return -EIO; + } + /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT1[60:21] + * PA[11:0] = IOVA[11:0] + */ + pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT1) >> 21; + pa &= GENMASK_ULL(39, 0); + *lmt_addr = (pa << 12) | (iova & 0xFFF); + + return 0; +} + +static int rvu_update_lmtaddr(struct rvu *rvu, u16 pcifunc, u64 lmt_addr) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + u32 tbl_idx; + int err = 0; + u64 val; + + /* Read the current lmt addr of pcifunc */ + tbl_idx = rvu_get_lmtst_tbl_index(rvu, pcifunc); + err = lmtst_map_table_ops(rvu, tbl_idx, &val, LMT_TBL_OP_READ); + if (err) { + dev_err(rvu->dev, + "Failed to read LMT map table: index 0x%x err %d\n", + tbl_idx, err); + return err; + } + + /* Storing the seondary's lmt base address as this needs to be + * reverted in FLR. Also making sure this default value doesn't + * get overwritten on multiple calls to this mailbox. + */ + if (!pfvf->lmt_base_addr) + pfvf->lmt_base_addr = val; + + /* Update the LMT table with new addr */ + err = lmtst_map_table_ops(rvu, tbl_idx, &lmt_addr, LMT_TBL_OP_WRITE); + if (err) { + dev_err(rvu->dev, + "Failed to update LMT map table: index 0x%x err %d\n", + tbl_idx, err); + return err; + } + return 0; +} + int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu, struct lmtst_tbl_setup_req *req, struct msg_rsp *rsp) { - struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); - u32 pri_tbl_idx, sec_tbl_idx; + u64 lmt_addr, val; + u32 pri_tbl_idx; int err = 0; - u64 val; + + /* Check if PF_FUNC wants to use it's own local memory as LMTLINE + * region, if so, convert that IOVA to physical address and + * populate LMT table with that address + */ + if (req->use_local_lmt_region) { + err = rvu_get_lmtaddr(rvu, req->hdr.pcifunc, + req->lmt_iova, &lmt_addr); + if (err < 0) + return err; + + /* Update the lmt addr for this PFFUNC in the LMT table */ + err = rvu_update_lmtaddr(rvu, req->hdr.pcifunc, lmt_addr); + if (err) + return err; + } /* Reconfiguring lmtst map table in lmt region shared mode i.e. make * multiple PF_FUNCs to share an LMTLINE region, so primary/base @@ -76,27 +163,6 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu, */ pri_tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->base_pcifunc); - /* Truncating secondary pcifunc to calculate the LMT table index - * equivalent to secondary pcifunc. - */ - sec_tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->hdr.pcifunc); - /* Read the base lmt addr of the secondary pcifunc */ - err = lmtst_map_table_ops(rvu, sec_tbl_idx, &val, - LMT_TBL_OP_READ); - if (err) { - dev_err(rvu->dev, - "Failed to read LMT map table: index 0x%x err %d\n", - sec_tbl_idx, err); - goto error; - } - - /* Storing the seondary's lmt base address as this needs to be - * reverted in FLR. Also making sure this default value doesn't - * get overwritten on multiple calls to this mailbox. - */ - if (!pfvf->lmt_base_addr) - pfvf->lmt_base_addr = val; - /* Read the base lmt addr of the primary pcifunc */ err = lmtst_map_table_ops(rvu, pri_tbl_idx, &val, LMT_TBL_OP_READ); @@ -104,24 +170,18 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu, dev_err(rvu->dev, "Failed to read LMT map table: index 0x%x err %d\n", pri_tbl_idx, err); - goto error; + return err; } /* Update the base lmt addr of secondary with primary's base * lmt addr. */ - err = lmtst_map_table_ops(rvu, sec_tbl_idx, &val, - LMT_TBL_OP_WRITE); - if (err) { - dev_err(rvu->dev, - "Failed to update LMT map table: index 0x%x err %d\n", - sec_tbl_idx, err); - goto error; - } + err = rvu_update_lmtaddr(rvu, req->hdr.pcifunc, val); + if (err) + return err; } -error: - return err; + return 0; } /* Resetting the lmtst map table to original base addresses */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 61bafe956aaebc..8b01ef6e2c9971 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -49,6 +49,11 @@ #define RVU_AF_PFX_VF_BAR4_ADDR (0x5400 | (a) << 4) #define RVU_AF_PFX_VF_BAR4_CFG (0x5600 | (a) << 4) #define RVU_AF_PFX_LMTLINE_ADDR (0x5800 | (a) << 4) +#define RVU_AF_SMMU_ADDR_REQ (0x6000) +#define RVU_AF_SMMU_TXN_REQ (0x6008) +#define RVU_AF_SMMU_ADDR_RSP_STS (0x6010) +#define RVU_AF_SMMU_ADDR_TLN (0x6018) +#define RVU_AF_SMMU_TLN_FLIT1 (0x6030) /* Admin function's privileged PF/VF registers */ #define RVU_PRIV_CONST (0x8000000) From 5c0512072f6517326d9fba083c4467f173ddd984 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 29 Jun 2021 22:30:06 +0530 Subject: [PATCH 045/714] octeontx2-pf: cn10k: Use runtime allocated LMTLINE region The current driver uses static LMTST region allocated by firmware. This memory gets populated as PF/VF BAR2. RVU PF/VF driver ioremap the memory as device memory for NIX/NPA operation. Since the memory is mapped as device memory we see performance degration. To address this issue this patch implements runtime memory allocation. RVU PF/VF allocates memory during device probe and share the base address with RVU AF. RVU AF then configure the LMT MAP table accordingly. Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/cn10k.c | 87 ++++++++----------- .../ethernet/marvell/octeontx2/nic/cn10k.h | 3 +- .../marvell/octeontx2/nic/otx2_common.h | 7 +- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 17 ++-- .../marvell/octeontx2/nic/otx2_txrx.h | 1 + .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 12 ++- 6 files changed, 54 insertions(+), 73 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 1b08896b46d2ec..184de946628682 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -22,69 +22,52 @@ static struct dev_hw_ops cn10k_hw_ops = { .refill_pool_ptrs = cn10k_refill_pool_ptrs, }; -int cn10k_pf_lmtst_init(struct otx2_nic *pf) +int cn10k_lmtst_init(struct otx2_nic *pfvf) { - int size, num_lines; - u64 base; - if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { - pf->hw_ops = &otx2_hw_ops; + struct lmtst_tbl_setup_req *req; + int qcount, err; + + if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) { + pfvf->hw_ops = &otx2_hw_ops; return 0; } - pf->hw_ops = &cn10k_hw_ops; - base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + - (MBOX_SIZE * (pf->total_vfs + 1)); - - size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) - - (MBOX_SIZE * (pf->total_vfs + 1)); - - pf->hw.lmt_base = ioremap(base, size); + pfvf->hw_ops = &cn10k_hw_ops; + qcount = pfvf->hw.max_queues; + /* LMTST lines allocation + * qcount = num_online_cpus(); + * NPA = TX + RX + XDP. + * NIX = TX * 32 (For Burst SQE flush). + */ + pfvf->tot_lmt_lines = (qcount * 3) + (qcount * 32); + pfvf->npa_lmt_lines = qcount * 3; + pfvf->nix_lmt_size = LMT_BURST_SIZE * LMT_LINE_SIZE; - if (!pf->hw.lmt_base) { - dev_err(pf->dev, "Unable to map PF LMTST region\n"); + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); return -ENOMEM; } - /* FIXME: Get the num of LMTST lines from LMT table */ - pf->tot_lmt_lines = size / LMT_LINE_SIZE; - num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) / - pf->hw.tx_queues; - /* Number of LMT lines per SQ queues */ - pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; - - pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE; - return 0; -} + req->use_local_lmt_region = true; -int cn10k_vf_lmtst_init(struct otx2_nic *vf) -{ - int size, num_lines; - - if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) { - vf->hw_ops = &otx2_hw_ops; - return 0; + err = qmem_alloc(pfvf->dev, &pfvf->dync_lmt, pfvf->tot_lmt_lines, + LMT_LINE_SIZE); + if (err) { + mutex_unlock(&pfvf->mbox.lock); + return err; } + pfvf->hw.lmt_base = (u64 *)pfvf->dync_lmt->base; + req->lmt_iova = (u64)pfvf->dync_lmt->iova; - vf->hw_ops = &cn10k_hw_ops; - size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM); - vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev, - PCI_MBOX_BAR_NUM), - size); - if (!vf->hw.lmt_base) { - dev_err(vf->dev, "Unable to map VF LMTST region\n"); - return -ENOMEM; - } + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); - vf->tot_lmt_lines = size / LMT_LINE_SIZE; - /* LMTST lines per SQ */ - num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) / - vf->hw.tx_queues; - vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; - vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE; return 0; } -EXPORT_SYMBOL(cn10k_vf_lmtst_init); +EXPORT_SYMBOL(cn10k_lmtst_init); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) { @@ -93,9 +76,11 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) struct otx2_snd_queue *sq; sq = &pfvf->qset.sq[qidx]; - sq->lmt_addr = (__force u64 *)((u64)pfvf->hw.nix_lmt_base + + sq->lmt_addr = (u64 *)((u64)pfvf->hw.nix_lmt_base + (qidx * pfvf->nix_lmt_size)); + sq->lmt_id = pfvf->npa_lmt_lines + (qidx * LMT_BURST_SIZE); + /* Get memory to put this msg */ aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); if (!aq) @@ -158,15 +143,13 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx) { - struct otx2_nic *pfvf = dev; - int lmt_id = NIX_LMTID_BASE + (qidx * pfvf->nix_lmt_lines); u64 val = 0, tar_addr = 0; /* FIXME: val[0:10] LMT_ID. * [12:15] no of LMTST - 1 in the burst. * [19:63] data size of each LMTST in the burst except first. */ - val = (lmt_id & 0x7FF); + val = (sq->lmt_id & 0x7FF); /* Target address for LMTST flush tells HW how many 128bit * words are present. * tar_addr[6:4] size of first LMTST - 1 in units of 128b. diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h index 71292a4cf1f3cc..1a1ae334477da2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -12,8 +12,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); -int cn10k_pf_lmtst_init(struct otx2_nic *pf); -int cn10k_vf_lmtst_init(struct otx2_nic *vf); +int cn10k_lmtst_init(struct otx2_nic *pfvf); int cn10k_free_all_ipolicers(struct otx2_nic *pfvf); int cn10k_alloc_matchall_ipolicer(struct otx2_nic *pfvf); int cn10k_free_matchall_ipolicer(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 234b330f318355..20a9c69f020fc4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -218,8 +218,8 @@ struct otx2_hw { unsigned long cap_flag; #define LMT_LINE_SIZE 128 -#define NIX_LMTID_BASE 72 /* RX + TX + XDP */ - void __iomem *lmt_base; +#define LMT_BURST_SIZE 32 /* 32 LMTST lines for burst SQE flush */ + u64 *lmt_base; u64 *npa_lmt_base; u64 *nix_lmt_base; }; @@ -363,8 +363,9 @@ struct otx2_nic { /* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */ int nix_blkaddr; /* LMTST Lines info */ + struct qmem *dync_lmt; u16 tot_lmt_lines; - u16 nix_lmt_lines; + u16 npa_lmt_lines; u32 nix_lmt_size; struct otx2_ptp *ptp; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 59912f73417b8f..088c28df849d09 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1533,10 +1533,10 @@ int otx2_open(struct net_device *netdev) if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { /* Reserve LMT lines for NPA AURA batch free */ - pf->hw.npa_lmt_base = (__force u64 *)pf->hw.lmt_base; + pf->hw.npa_lmt_base = pf->hw.lmt_base; /* Reserve LMT lines for NIX TX */ - pf->hw.nix_lmt_base = (__force u64 *)((u64)pf->hw.npa_lmt_base + - (NIX_LMTID_BASE * LMT_LINE_SIZE)); + pf->hw.nix_lmt_base = (u64 *)((u64)pf->hw.npa_lmt_base + + (pf->npa_lmt_lines * LMT_LINE_SIZE)); } err = otx2_init_hw_resources(pf); @@ -2526,7 +2526,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - err = cn10k_pf_lmtst_init(pf); + err = cn10k_lmtst_init(pf); if (err) goto err_detach_rsrc; @@ -2630,8 +2630,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_ptp_destroy: otx2_ptp_destroy(pf); err_detach_rsrc: - if (hw->lmt_base) - iounmap(hw->lmt_base); + if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) + qmem_free(pf->dev, pf->dync_lmt); otx2_detach_resources(&pf->mbox); err_disable_mbox_intr: otx2_disable_mbox_intr(pf); @@ -2772,9 +2772,8 @@ static void otx2_remove(struct pci_dev *pdev) otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); otx2_detach_resources(&pf->mbox); - if (pf->hw.lmt_base) - iounmap(pf->hw.lmt_base); - + if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) + qmem_free(pf->dev, pf->dync_lmt); otx2_disable_mbox_intr(pf); otx2_pfaf_mbox_destroy(pf); pci_free_irq_vectors(pf->pdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 52486c1f0973ae..2f144e2cf43613 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -83,6 +83,7 @@ struct otx2_snd_queue { u16 num_sqbs; u16 sqe_thresh; u8 sqe_per_sqb; + u32 lmt_id; u64 io_addr; u64 *aura_fc_addr; u64 *lmt_addr; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 13a908f75ba0fe..a8bee5aefec1fa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -609,7 +609,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - err = cn10k_vf_lmtst_init(vf); + err = cn10k_lmtst_init(vf); if (err) goto err_detach_rsrc; @@ -667,8 +667,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_unreg_netdev: unregister_netdev(netdev); err_detach_rsrc: - if (hw->lmt_base) - iounmap(hw->lmt_base); + if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) + qmem_free(vf->dev, vf->dync_lmt); otx2_detach_resources(&vf->mbox); err_disable_mbox_intr: otx2vf_disable_mbox_intr(vf); @@ -700,10 +700,8 @@ static void otx2vf_remove(struct pci_dev *pdev) destroy_workqueue(vf->otx2_wq); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); - - if (vf->hw.lmt_base) - iounmap(vf->hw.lmt_base); - + if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) + qmem_free(vf->dev, vf->dync_lmt); otx2vf_vfaf_mbox_destroy(vf); pci_free_irq_vectors(vf->pdev); pci_set_drvdata(pdev, NULL); From b71d09871566a20ae8a1064e50f1e94813b18482 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Jun 2021 23:32:15 +0300 Subject: [PATCH 046/714] net: dsa: return -EOPNOTSUPP when driver does not implement .port_lag_join The DSA core has a layered structure, and even though we end up returning 0 (success) to user space when setting a bonding/team upper that can't be offloaded, some parts of the framework actually need to know that we couldn't offload that. For example, if dsa_switch_lag_join returns 0 as it currently does, dsa_port_lag_join has no way to tell a successful offload from a software fallback, and it will call dsa_port_bridge_join afterwards. Then we'll think we're offloading the bridge master of the LAG, when in fact we're not even offloading the LAG. In turn, this will make us set skb->offload_fwd_mark = true, which is incorrect and the bridge doesn't like it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index af71b863809832..24845514598201 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -427,7 +427,7 @@ static int dsa_switch_lag_join(struct dsa_switch *ds, info->port, info->lag, info->info); - return 0; + return -EOPNOTSUPP; } static int dsa_switch_lag_leave(struct dsa_switch *ds, @@ -440,7 +440,7 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds, return ds->ops->crosschip_lag_leave(ds, info->sw_index, info->port, info->lag); - return 0; + return -EOPNOTSUPP; } static int dsa_switch_mdb_add(struct dsa_switch *ds, From 5a3c680aa2c12c90c44af383fe6882a39875ab81 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 29 Jun 2021 17:14:19 -0700 Subject: [PATCH 047/714] net: bcmgenet: ensure EXT_ENERGY_DET_MASK is clear Setting the EXT_ENERGY_DET_MASK bit allows the port energy detection logic of the internal PHY to prevent the system from sleeping. Some internal PHYs will report that energy is detected when the network interface is closed which can prevent the system from going to sleep if WoL is enabled when the interface is brought down. Since the driver does not support waking the system on this logic, this commit clears the bit whenever the internal PHY is powered up and the other logic for manipulating the bit is removed since it serves no useful function. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 17 ++--------------- .../net/ethernet/broadcom/genet/bcmgenet_wol.c | 6 ------ 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 41f7f078cd27c5..35e9956e930c18 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1640,7 +1640,8 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, switch (mode) { case GENET_POWER_PASSIVE: - reg &= ~(EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS); + reg &= ~(EXT_PWR_DOWN_DLL | EXT_PWR_DOWN_BIAS | + EXT_ENERGY_DET_MASK); if (GENET_IS_V5(priv)) { reg &= ~(EXT_PWR_DOWN_PHY_EN | EXT_PWR_DOWN_PHY_RD | @@ -3292,7 +3293,6 @@ static int bcmgenet_open(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); unsigned long dma_ctrl; - u32 reg; int ret; netif_dbg(priv, ifup, dev, "bcmgenet_open\n"); @@ -3318,12 +3318,6 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - if (priv->internal_phy) { - reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - reg |= EXT_ENERGY_DET_MASK; - bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - } - /* Disable RX/TX DMA and flush TX queues */ dma_ctrl = bcmgenet_dma_disable(priv); @@ -4139,7 +4133,6 @@ static int bcmgenet_resume(struct device *d) struct bcmgenet_priv *priv = netdev_priv(dev); struct bcmgenet_rxnfc_rule *rule; unsigned long dma_ctrl; - u32 reg; int ret; if (!netif_running(dev)) @@ -4176,12 +4169,6 @@ static int bcmgenet_resume(struct device *d) if (rule->state != BCMGENET_RXNFC_STATE_UNUSED) bcmgenet_hfb_create_rxnfc_filter(priv, rule); - if (priv->internal_phy) { - reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - reg |= EXT_ENERGY_DET_MASK; - bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - } - /* Disable RX/TX DMA and flush TX queues */ dma_ctrl = bcmgenet_dma_disable(priv); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index facde824bcaaba..e31a5a397f1141 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -186,12 +186,6 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); - if (priv->hw_params->flags & GENET_HAS_EXT) { - reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - reg &= ~EXT_ENERGY_DET_MASK; - bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - } - reg = UMAC_IRQ_MPD_R; if (hfb_enable) reg |= UMAC_IRQ_HFB_SM | UMAC_IRQ_HFB_MM; From 650b2a846dddf33321489597287e6d825ec1d0ba Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Jun 2021 23:30:36 -0400 Subject: [PATCH 048/714] sctp: check pl.raise_count separately from its increment As Marcelo's suggestion this will make code more clear to read. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/transport.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5f23804f21c7db..397a6244dd97c8 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -335,10 +335,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; + } else if (t->pl.state == SCTP_PL_COMPLETE) { + t->pl.raise_count++; + if (t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; + } } } From 1d11fa231cabeae09a95cb3e4cf1d9dd34e00f08 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Jun 2021 23:34:08 -0400 Subject: [PATCH 049/714] sctp: move 198 addresses from unusable to private scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc draft-stewart-tsvwg-sctp-ipv4-00 that restricts 198 addresses was never published. These addresses as private addresses should be allowed to use in SCTP. As Michael Tuexen suggested, this patch is to move 198 addresses from unusable to private scope. Reported-by: Sérgio Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 +--- net/sctp/protocol.c | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 265fffa33dad3d..5859e0a16a584a 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -360,8 +360,7 @@ enum { #define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , - * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, - * 192.88.99.0/24. + * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24. * Also, RFC 8.4, non-unicast addresses are not considered valid SCTP * addresses. */ @@ -369,7 +368,6 @@ enum { ((htonl(INADDR_BROADCAST) == a) || \ ipv4_is_multicast(a) || \ ipv4_is_zeronet(a) || \ - ipv4_is_test_198(a) || \ ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3c1fbf38f4f7df..ec0f52567c16ff 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -398,7 +398,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr) retval = SCTP_SCOPE_LINK; } else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) || ipv4_is_private_172(addr->v4.sin_addr.s_addr) || - ipv4_is_private_192(addr->v4.sin_addr.s_addr)) { + ipv4_is_private_192(addr->v4.sin_addr.s_addr) || + ipv4_is_test_198(addr->v4.sin_addr.s_addr)) { retval = SCTP_SCOPE_PRIVATE; } else { retval = SCTP_SCOPE_GLOBAL; From 7da467d82d1ed4fb317aff836f99709169e73f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:26 +0200 Subject: [PATCH 050/714] net: dsa: mv88e6xxx: enable .port_set_policy() on Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f3a2cd326e44 ("net: dsa: mv88e6xxx: introduce .port_set_policy") introduced .port_set_policy() method with implementation for several models, but forgot to add Topaz, which can use the 6352 implementation. Use the 6352 implementation of .port_set_policy() on Topaz. Signed-off-by: Marek Behún Fixes: f3a2cd326e44 ("net: dsa: mv88e6xxx: introduce .port_set_policy") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 961fa6b75cad82..6bcee3e012d4ca 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3583,6 +3583,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_speed_duplex = mv88e6341_port_set_speed_duplex, .port_max_speed_mode = mv88e6341_port_max_speed_mode, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, @@ -4383,6 +4384,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_set_speed_duplex = mv88e6341_port_set_speed_duplex, .port_max_speed_mode = mv88e6341_port_max_speed_mode, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, From 11527f3c4725640e6c40a2b7654e303f45e82a6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:27 +0200 Subject: [PATCH 051/714] net: dsa: mv88e6xxx: use correct .stats_set_histogram() on Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 40cff8fca9e3 ("net: dsa: mv88e6xxx: Fix stats histogram mode") introduced wrong .stats_set_histogram() method for Topaz family. The Peridot method should be used instead. Signed-off-by: Marek Behún Fixes: 40cff8fca9e3 ("net: dsa: mv88e6xxx: Fix stats histogram mode") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6bcee3e012d4ca..b125d3227dbd05 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3597,7 +3597,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_cmode = mv88e6341_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6390_g1_stats_snapshot, - .stats_set_histogram = mv88e6095_g1_stats_set_histogram, + .stats_set_histogram = mv88e6390_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6390_stats_get_stats, @@ -4398,7 +4398,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_set_cmode = mv88e6341_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .stats_snapshot = mv88e6390_g1_stats_snapshot, - .stats_set_histogram = mv88e6095_g1_stats_set_histogram, + .stats_set_histogram = mv88e6390_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, .stats_get_strings = mv88e6320_stats_get_strings, .stats_get_stats = mv88e6390_stats_get_stats, From 3709488790022c85720f991bff50d48ed5a36e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:28 +0200 Subject: [PATCH 052/714] net: dsa: mv88e6xxx: enable .rmu_disable() on Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9e5baf9b36367 ("net: dsa: mv88e6xxx: add RMU disable op") introduced .rmu_disable() method with implementation for several models, but forgot to add Topaz, which can use the Peridot implementation. Use the Peridot implementation of .rmu_disable() on Topaz. Signed-off-by: Marek Behún Fixes: 9e5baf9b36367 ("net: dsa: mv88e6xxx: add RMU disable op") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index b125d3227dbd05..d4b05c10e5f2d1 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3607,6 +3607,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, + .rmu_disable = mv88e6390_g1_rmu_disable, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, @@ -4408,6 +4409,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, + .rmu_disable = mv88e6390_g1_rmu_disable, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, From c07fff3492acae41cedbabea395b644dd5872b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:29 +0200 Subject: [PATCH 053/714] net: dsa: mv88e6xxx: enable devlink ATU hash param for Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 23e8b470c7788 ("net: dsa: mv88e6xxx: Add devlink param for ATU hash algorithm.") introduced ATU hash algorithm access via devlink, but did not enable it for Topaz. Enable this feature also for Topaz. Signed-off-by: Marek Behún Fixes: 23e8b470c7788 ("net: dsa: mv88e6xxx: Add devlink param for ATU hash algorithm.") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d4b05c10e5f2d1..354ff0b84b7f16 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3608,6 +3608,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, + .atu_get_hash = mv88e6165_g1_atu_get_hash, + .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, @@ -4410,6 +4412,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, + .atu_get_hash = mv88e6165_g1_atu_get_hash, + .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, From a03b98d68367b18e5db6d6850e2cc18754fba94a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:30 +0200 Subject: [PATCH 054/714] net: dsa: mv88e6xxx: enable SerDes RX stats for Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0df952873636a ("mv88e6xxx: Add serdes Rx statistics") added support for RX statistics on SerDes ports for Peridot. This same implementation is also valid for Topaz, but was not enabled at the time. We need to use the generic .serdes_get_lane() method instead of the Peridot specific one in the stats methods so that on Topaz the proper one is used. Signed-off-by: Marek Behún Fixes: 0df952873636a ("mv88e6xxx: Add serdes Rx statistics") Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 6 ++++++ drivers/net/dsa/mv88e6xxx/serdes.c | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 354ff0b84b7f16..1e95a0facbd431 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3623,6 +3623,9 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .serdes_irq_enable = mv88e6390_serdes_irq_enable, .serdes_irq_status = mv88e6390_serdes_irq_status, .gpio_ops = &mv88e6352_gpio_ops, + .serdes_get_sset_count = mv88e6390_serdes_get_sset_count, + .serdes_get_strings = mv88e6390_serdes_get_strings, + .serdes_get_stats = mv88e6390_serdes_get_stats, .phylink_validate = mv88e6341_phylink_validate, }; @@ -4429,6 +4432,9 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6390_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, + .serdes_get_sset_count = mv88e6390_serdes_get_sset_count, + .serdes_get_strings = mv88e6390_serdes_get_strings, + .serdes_get_stats = mv88e6390_serdes_get_stats, .phylink_validate = mv88e6341_phylink_validate, }; diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index e4fbef81bc52d3..b1d46dd8eaabcd 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -722,7 +722,7 @@ static struct mv88e6390_serdes_hw_stat mv88e6390_serdes_hw_stats[] = { int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port) { - if (mv88e6390_serdes_get_lane(chip, port) < 0) + if (mv88e6xxx_serdes_get_lane(chip, port) < 0) return 0; return ARRAY_SIZE(mv88e6390_serdes_hw_stats); @@ -734,7 +734,7 @@ int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip, struct mv88e6390_serdes_hw_stat *stat; int i; - if (mv88e6390_serdes_get_lane(chip, port) < 0) + if (mv88e6xxx_serdes_get_lane(chip, port) < 0) return 0; for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) { @@ -770,7 +770,7 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, int lane; int i; - lane = mv88e6390_serdes_get_lane(chip, port); + lane = mv88e6xxx_serdes_get_lane(chip, port); if (lane < 0) return 0; From 953b0dcbe2e3f7bee98cc3bca2ec82c8298e9c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 1 Jul 2021 00:22:31 +0200 Subject: [PATCH 055/714] net: dsa: mv88e6xxx: enable SerDes PCS register dump via ethtool -d on Topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bf3504cea7d7e ("net: dsa: mv88e6xxx: Add 6390 family PCS registers to ethtool -d") added support for dumping SerDes PCS registers via ethtool -d for Peridot. The same implementation is also valid for Topaz, but was not enabled at the time. Signed-off-by: Marek Behún Fixes: bf3504cea7d7e ("net: dsa: mv88e6xxx: Add 6390 family PCS registers to ethtool -d") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 1e95a0facbd431..beb41572d04ea2 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3626,6 +3626,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .serdes_get_sset_count = mv88e6390_serdes_get_sset_count, .serdes_get_strings = mv88e6390_serdes_get_strings, .serdes_get_stats = mv88e6390_serdes_get_stats, + .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, + .serdes_get_regs = mv88e6390_serdes_get_regs, .phylink_validate = mv88e6341_phylink_validate, }; @@ -4435,6 +4437,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .serdes_get_sset_count = mv88e6390_serdes_get_sset_count, .serdes_get_strings = mv88e6390_serdes_get_strings, .serdes_get_stats = mv88e6390_serdes_get_stats, + .serdes_get_regs_len = mv88e6390_serdes_get_regs_len, + .serdes_get_regs = mv88e6390_serdes_get_regs, .phylink_validate = mv88e6341_phylink_validate, }; From 6f14078e3ee59ccc5806f7bff0f25f94a6d3ff80 Mon Sep 17 00:00:00 2001 From: Sunil Kumar Kori Date: Wed, 30 Jun 2021 15:40:57 +0530 Subject: [PATCH 056/714] octeontx2-af: DMAC filter support in MAC block MAC block supports 32 dmac filters which are logically divided among all attached LMACS. For example MAC block0 having one LMAC then maximum supported filters are 32 where as MAC block1 having 4 enabled LMACS them maximum supported filteres are 8 for each LMAC. This patch adds mbox handlers to add/delete/update mac entry in DMAC filter table. Signed-off-by: Sunil Kumar Kori Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/cgx.c | 264 +++++++++++++++++- .../net/ethernet/marvell/octeontx2/af/cgx.h | 7 + .../marvell/octeontx2/af/lmac_common.h | 12 +- .../net/ethernet/marvell/octeontx2/af/mbox.h | 48 +++- .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 109 ++++++++ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 3 + 7 files changed, 425 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index fac6474ad694dd..bc413f96b43035 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -86,6 +86,22 @@ bool is_lmac_valid(struct cgx *cgx, int lmac_id) return test_bit(lmac_id, &cgx->lmac_bmap); } +/* Helper function to get sequential index + * given the enabled LMAC of a CGX + */ +static int get_sequence_id_of_lmac(struct cgx *cgx, int lmac_id) +{ + int tmp, id = 0; + + for_each_set_bit(tmp, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { + if (tmp == lmac_id) + break; + id++; + } + + return id; +} + struct mac_ops *get_mac_ops(void *cgxd) { if (!cgxd) @@ -211,37 +227,229 @@ static u64 mac2u64 (u8 *mac_addr) return mac; } +static void cfg2mac(u64 cfg, u8 *mac_addr) +{ + int i, index = 0; + + for (i = ETH_ALEN - 1; i >= 0; i--, index++) + mac_addr[i] = (cfg >> (8 * index)) & 0xFF; +} + int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); struct mac_ops *mac_ops; + int index, id; u64 cfg; + /* access mac_ops to know csr_offset */ mac_ops = cgx_dev->mac_ops; + /* copy 6bytes from macaddr */ /* memcpy(&cfg, mac_addr, 6); */ cfg = mac2u64 (mac_addr); - cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)), + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max; + + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8)), cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49)); cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); - cfg |= CGX_DMAC_CTL0_CAM_ENABLE; + cfg |= (CGX_DMAC_CTL0_CAM_ENABLE | CGX_DMAC_BCAST_MODE | + CGX_DMAC_MCAST_MODE); + cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + return 0; +} + +int cgx_lmac_addr_add(u8 cgx_id, u8 lmac_id, u8 *mac_addr) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); + struct mac_ops *mac_ops; + int index, idx; + u64 cfg = 0; + int id; + + if (!lmac) + return -ENODEV; + + mac_ops = cgx_dev->mac_ops; + /* Get available index where entry is to be installed */ + idx = rvu_alloc_rsrc(&lmac->mac_to_index_bmap); + if (idx < 0) + return idx; + + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max + idx; + + cfg = mac2u64 (mac_addr); + cfg |= CGX_DMAC_CAM_ADDR_ENABLE; + cfg |= ((u64)lmac_id << 49); + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8)), cfg); + + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg |= (CGX_DMAC_BCAST_MODE | CGX_DMAC_CAM_ACCEPT); + + if (is_multicast_ether_addr(mac_addr)) { + cfg &= ~GENMASK_ULL(2, 1); + cfg |= CGX_DMAC_MCAST_MODE_CAM; + lmac->mcast_filters_count++; + } else if (!lmac->mcast_filters_count) { + cfg |= CGX_DMAC_MCAST_MODE; + } + + cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + return idx; +} + +int cgx_lmac_addr_reset(u8 cgx_id, u8 lmac_id) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); + struct mac_ops *mac_ops; + u8 index = 0, id; + u64 cfg; + + if (!lmac) + return -ENODEV; + + mac_ops = cgx_dev->mac_ops; + /* Restore index 0 to its default init value as done during + * cgx_lmac_init + */ + set_bit(0, lmac->mac_to_index_bmap.bmap); + + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max + index; + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8)), 0); + + /* Reset CGXX_CMRX_RX_DMAC_CTL0 register to default state */ + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg &= ~CGX_DMAC_CAM_ACCEPT; + cfg |= (CGX_DMAC_BCAST_MODE | CGX_DMAC_MCAST_MODE); cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); return 0; } +/* Allows caller to change macaddress associated with index + * in dmac filter table including index 0 reserved for + * interface mac address + */ +int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct mac_ops *mac_ops; + struct lmac *lmac; + u64 cfg; + int id; + + lmac = lmac_pdata(lmac_id, cgx_dev); + if (!lmac) + return -ENODEV; + + mac_ops = cgx_dev->mac_ops; + /* Validate the index */ + if (index >= lmac->mac_to_index_bmap.max) + return -EINVAL; + + /* ensure index is already set */ + if (!test_bit(index, lmac->mac_to_index_bmap.bmap)) + return -EINVAL; + + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max + index; + + cfg = cgx_read(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8))); + cfg &= ~CGX_RX_DMAC_ADR_MASK; + cfg |= mac2u64 (mac_addr); + + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8)), cfg); + return 0; +} + +int cgx_lmac_addr_del(u8 cgx_id, u8 lmac_id, u8 index) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); + struct mac_ops *mac_ops; + u8 mac[ETH_ALEN]; + u64 cfg; + int id; + + if (!lmac) + return -ENODEV; + + mac_ops = cgx_dev->mac_ops; + /* Validate the index */ + if (index >= lmac->mac_to_index_bmap.max) + return -EINVAL; + + /* Skip deletion for reserved index i.e. index 0 */ + if (index == 0) + return 0; + + rvu_free_rsrc(&lmac->mac_to_index_bmap, index); + + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max + index; + + /* Read MAC address to check whether it is ucast or mcast */ + cfg = cgx_read(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8))); + + cfg2mac(cfg, mac); + if (is_multicast_ether_addr(mac)) + lmac->mcast_filters_count--; + + if (!lmac->mcast_filters_count) { + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg &= ~GENMASK_ULL(2, 1); + cfg |= CGX_DMAC_MCAST_MODE; + cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + } + + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 0x8)), 0); + + return 0; +} + +int cgx_lmac_addr_max_entries_get(u8 cgx_id, u8 lmac_id) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); + + if (lmac) + return lmac->mac_to_index_bmap.max; + + return 0; +} + u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx_dev); struct mac_ops *mac_ops; + int index; u64 cfg; + int id; mac_ops = cgx_dev->mac_ops; - cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8); + id = get_sequence_id_of_lmac(cgx_dev, lmac_id); + + index = id * lmac->mac_to_index_bmap.max; + + cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + index * 0x8); return cfg & CGX_RX_DMAC_ADR_MASK; } @@ -297,35 +505,51 @@ int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) { struct cgx *cgx = cgx_get_pdata(cgx_id); + struct lmac *lmac = lmac_pdata(lmac_id, cgx); + u16 max_dmac = lmac->mac_to_index_bmap.max; struct mac_ops *mac_ops; + int index, i; u64 cfg = 0; + int id; if (!cgx) return; + id = get_sequence_id_of_lmac(cgx, lmac_id); + mac_ops = cgx->mac_ops; if (enable) { /* Enable promiscuous mode on LMAC */ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); - cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE); - cfg |= CGX_DMAC_BCAST_MODE; + cfg &= ~CGX_DMAC_CAM_ACCEPT; + cfg |= (CGX_DMAC_BCAST_MODE | CGX_DMAC_MCAST_MODE); cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); - cfg = cgx_read(cgx, 0, - (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); - cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE; - cgx_write(cgx, 0, - (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + for (i = 0; i < max_dmac; i++) { + index = id * max_dmac + i; + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + index * 0x8)); + cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + index * 0x8), cfg); + } } else { /* Disable promiscuous mode */ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE; cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); - cfg = cgx_read(cgx, 0, - (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); - cfg |= CGX_DMAC_CAM_ADDR_ENABLE; - cgx_write(cgx, 0, - (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + for (i = 0; i < max_dmac; i++) { + index = id * max_dmac + i; + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + index * 0x8)); + if ((cfg & CGX_RX_DMAC_ADR_MASK) != 0) { + cfg |= CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + + index * 0x8), + cfg); + } + } } } @@ -1234,6 +1458,15 @@ static int cgx_lmac_init(struct cgx *cgx) } lmac->cgx = cgx; + lmac->mac_to_index_bmap.max = + MAX_DMAC_ENTRIES_PER_CGX / cgx->lmac_count; + err = rvu_alloc_bitmap(&lmac->mac_to_index_bmap); + if (err) + return err; + + /* Reserve first entry for default MAC address */ + set_bit(0, lmac->mac_to_index_bmap.bmap); + init_waitqueue_head(&lmac->wq_cmd_cmplt); mutex_init(&lmac->cmd_lock); spin_lock_init(&lmac->event_cb_lock); @@ -1274,6 +1507,7 @@ static int cgx_lmac_exit(struct cgx *cgx) continue; cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, false); cgx_configure_interrupt(cgx, lmac, lmac->lmac_id, true); + kfree(lmac->mac_to_index_bmap.bmap); kfree(lmac->name); kfree(lmac); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 12521262164af5..0c613f83a41c22 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -23,6 +23,7 @@ #define CGX_ID_MASK 0x7 #define MAX_LMAC_PER_CGX 4 +#define MAX_DMAC_ENTRIES_PER_CGX 32 #define CGX_FIFO_LEN 65536 /* 64K for both Rx & Tx */ #define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX) @@ -46,6 +47,7 @@ #define CGXX_CMRX_RX_DMAC_CTL0 (0x1F8 + mac_ops->csr_offset) #define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) #define CGX_DMAC_CAM_ACCEPT BIT_ULL(3) +#define CGX_DMAC_MCAST_MODE_CAM BIT_ULL(2) #define CGX_DMAC_MCAST_MODE BIT_ULL(1) #define CGX_DMAC_BCAST_MODE BIT_ULL(0) #define CGXX_CMRX_RX_DMAC_CAM0 (0x200 + mac_ops->csr_offset) @@ -139,7 +141,11 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat); int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable); int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable); int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr); +int cgx_lmac_addr_reset(u8 cgx_id, u8 lmac_id); u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id); +int cgx_lmac_addr_add(u8 cgx_id, u8 lmac_id, u8 *mac_addr); +int cgx_lmac_addr_del(u8 cgx_id, u8 lmac_id, u8 index); +int cgx_lmac_addr_max_entries_get(u8 cgx_id, u8 lmac_id); void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable); void cgx_lmac_enadis_rx_pause_fwding(void *cgxd, int lmac_id, bool enable); int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable); @@ -165,4 +171,5 @@ u8 cgx_get_lmacid(void *cgxd, u8 lmac_index); unsigned long cgx_get_lmac_bmap(void *cgxd); void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val); u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset); +int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 45706fd87120f0..a8b7b1c7a1d581 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -10,17 +10,19 @@ #include "rvu.h" #include "cgx.h" /** - * struct lmac + * struct lmac - per lmac locks and properties * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion * @cmd_lock: Lock to serialize the command interface * @resp: command response * @link_info: link related information + * @mac_to_index_bmap: Mac address to CGX table index mapping * @event_cb: callback for linkchange events * @event_cb_lock: lock for serializing callback with unregister - * @cmd_pend: flag set before new command is started - * flag cleared after command response is received * @cgx: parent cgx port + * @mcast_filters_count: Number of multicast filters installed * @lmac_id: lmac port id + * @cmd_pend: flag set before new command is started + * flag cleared after command response is received * @name: lmac port name */ struct lmac { @@ -29,12 +31,14 @@ struct lmac { struct mutex cmd_lock; u64 resp; struct cgx_link_user_info link_info; + struct rsrc_bmap mac_to_index_bmap; struct cgx_event_cb event_cb; /* lock for serializing callback with unregister */ spinlock_t event_cb_lock; - bool cmd_pend; struct cgx *cgx; + u8 mcast_filters_count; u8 lmac_id; + bool cmd_pend; char *name; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 9672cbf8a90a69..f5ec39de026a56 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -165,7 +165,15 @@ M(CGX_SET_LINK_MODE, 0x214, cgx_set_link_mode, cgx_set_link_mode_req,\ M(CGX_FEATURES_GET, 0x215, cgx_features_get, msg_req, \ cgx_features_info_msg) \ M(RPM_STATS, 0x216, rpm_stats, msg_req, rpm_stats_rsp) \ - /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ +M(CGX_MAC_ADDR_ADD, 0x217, cgx_mac_addr_add, cgx_mac_addr_add_req, \ + cgx_mac_addr_add_rsp) \ +M(CGX_MAC_ADDR_DEL, 0x218, cgx_mac_addr_del, cgx_mac_addr_del_req, \ + msg_rsp) \ +M(CGX_MAC_MAX_ENTRIES_GET, 0x219, cgx_mac_max_entries_get, msg_req, \ + cgx_max_dmac_entries_get_rsp) \ +M(CGX_MAC_ADDR_RESET, 0x21A, cgx_mac_addr_reset, msg_req, msg_rsp) \ +M(CGX_MAC_ADDR_UPDATE, 0x21B, cgx_mac_addr_update, cgx_mac_addr_update_req, \ + msg_rsp) \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ npa_lf_alloc_req, npa_lf_alloc_rsp) \ @@ -403,6 +411,38 @@ struct cgx_mac_addr_set_or_get { u8 mac_addr[ETH_ALEN]; }; +/* Structure for requesting the operation to + * add DMAC filter entry into CGX interface + */ +struct cgx_mac_addr_add_req { + struct mbox_msghdr hdr; + u8 mac_addr[ETH_ALEN]; +}; + +/* Structure for response against the operation to + * add DMAC filter entry into CGX interface + */ +struct cgx_mac_addr_add_rsp { + struct mbox_msghdr hdr; + u8 index; +}; + +/* Structure for requesting the operation to + * delete DMAC filter entry from CGX interface + */ +struct cgx_mac_addr_del_req { + struct mbox_msghdr hdr; + u8 index; +}; + +/* Structure for response against the operation to + * get maximum supported DMAC filter entries + */ +struct cgx_max_dmac_entries_get_rsp { + struct mbox_msghdr hdr; + u8 max_dmac_filters; +}; + struct cgx_link_user_info { uint64_t link_up:1; uint64_t full_duplex:1; @@ -501,6 +541,12 @@ struct cgx_set_link_mode_rsp { int status; }; +struct cgx_mac_addr_update_req { + struct mbox_msghdr hdr; + u8 mac_addr[ETH_ALEN]; + u8 index; +}; + #define RVU_LMAC_FEAT_FC BIT_ULL(0) /* pause frames */ #define RVU_LMAC_FEAT_PTP BIT_ULL(1) /* precision time protocol */ #define RVU_MAC_VERSION BIT_ULL(2) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 3c0a7e981f72b5..bc0d245070338e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -657,6 +657,8 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable); int rvu_cgx_start_stop_io(struct rvu *rvu, u16 pcifunc, bool start); int rvu_cgx_nix_cuml_stats(struct rvu *rvu, void *cgxd, int lmac_id, int index, int rxtxflag, u64 *stat); +void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc); + /* NPA APIs */ int rvu_npa_init(struct rvu *rvu); void rvu_npa_freemem(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6e2bf4fcd29cfc..9c6f4ba2d7268b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -454,6 +454,31 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) return 0; } +void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc) +{ + int pf = rvu_get_pf(pcifunc); + int i = 0, lmac_count = 0; + u8 max_dmac_filters; + u8 cgx_id, lmac_id; + void *cgx_dev; + + if (!is_cgx_config_permitted(rvu, pcifunc)) + return; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgx_dev = cgx_get_pdata(cgx_id); + lmac_count = cgx_get_lmac_cnt(cgx_dev); + max_dmac_filters = MAX_DMAC_ENTRIES_PER_CGX / lmac_count; + + for (i = 0; i < max_dmac_filters; i++) + cgx_lmac_addr_del(cgx_id, lmac_id, i); + + /* As cgx_lmac_addr_del does not clear entry for index 0 + * so it needs to be done explicitly + */ + cgx_lmac_addr_reset(cgx_id, lmac_id); +} + int rvu_mbox_handler_cgx_start_rxtx(struct rvu *rvu, struct msg_req *req, struct msg_rsp *rsp) { @@ -557,6 +582,63 @@ int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu, return 0; } +int rvu_mbox_handler_cgx_mac_addr_add(struct rvu *rvu, + struct cgx_mac_addr_add_req *req, + struct cgx_mac_addr_add_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + int rc = 0; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + rc = cgx_lmac_addr_add(cgx_id, lmac_id, req->mac_addr); + if (rc >= 0) { + rsp->index = rc; + return 0; + } + + return rc; +} + +int rvu_mbox_handler_cgx_mac_addr_del(struct rvu *rvu, + struct cgx_mac_addr_del_req *req, + struct msg_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + return cgx_lmac_addr_del(cgx_id, lmac_id, req->index); +} + +int rvu_mbox_handler_cgx_mac_max_entries_get(struct rvu *rvu, + struct msg_req *req, + struct cgx_max_dmac_entries_get_rsp + *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + /* If msg is received from PFs(which are not mapped to CGX LMACs) + * or VF then no entries are allocated for DMAC filters at CGX level. + * So returning zero. + */ + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) { + rsp->max_dmac_filters = 0; + return 0; + } + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + rsp->max_dmac_filters = cgx_lmac_addr_max_entries_get(cgx_id, lmac_id); + return 0; +} + int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu, struct cgx_mac_addr_set_or_get *req, struct cgx_mac_addr_set_or_get *rsp) @@ -953,3 +1035,30 @@ int rvu_mbox_handler_cgx_set_link_mode(struct rvu *rvu, rsp->status = cgx_set_link_mode(cgxd, req->args, cgx_idx, lmac); return 0; } + +int rvu_mbox_handler_cgx_mac_addr_reset(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + return cgx_lmac_addr_reset(cgx_id, lmac_id); +} + +int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu, + struct cgx_mac_addr_update_req *req, + struct msg_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d6f8210652c5c9..aeae3770442807 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -346,6 +346,9 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf) /* Free and disable any MCAM entries used by this NIX LF */ rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf); + + /* Disable DMAC filters used */ + rvu_cgx_disable_dmac_entries(rvu, pcifunc); } int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, From dbc52debf95f8f902f74309e7ae0de38e1ed4e4c Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 30 Jun 2021 15:40:58 +0530 Subject: [PATCH 057/714] octeontx2-af: Debugfs support for DMAC filters Add debugfs support to display CGX/RPM DMAC filter table associated with pf. cat /sys/kernel/debug/octeontx2/cgx/cgx0/lmac0/mac_filter PCI dev RVUPF BROADCAST MULTICAST FILTER-MODE 0002:02:00.0 PF2 ACCEPT ACCEPT UNICAST DMAC-INDEX ADDRESS 0 00:0f:b7:06:17:06 1 1a:1b:1c:1d:1e:01 2 1a:1b:1c:1d:1e:02 Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/cgx.c | 28 ++++++ .../net/ethernet/marvell/octeontx2/af/cgx.h | 3 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- .../marvell/octeontx2/af/rvu_debugfs.c | 88 +++++++++++++++++-- 5 files changed, 113 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index bc413f96b43035..9169849881bff6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -266,6 +266,34 @@ int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) return 0; } +u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id) +{ + struct mac_ops *mac_ops; + struct cgx *cgx = cgxd; + + if (!cgxd || !is_lmac_valid(cgxd, lmac_id)) + return 0; + + cgx = cgxd; + /* Get mac_ops to know csr offset */ + mac_ops = cgx->mac_ops; + + return cgx_read(cgxd, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); +} + +u64 cgx_read_dmac_entry(void *cgxd, int index) +{ + struct mac_ops *mac_ops; + struct cgx *cgx; + + if (!cgxd) + return 0; + + cgx = cgxd; + mac_ops = cgx->mac_ops; + return cgx_read(cgx, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (index * 8))); +} + int cgx_lmac_addr_add(u8 cgx_id, u8 lmac_id, u8 *mac_addr) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 0c613f83a41c22..237ba2b56210bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -52,6 +52,7 @@ #define CGX_DMAC_BCAST_MODE BIT_ULL(0) #define CGXX_CMRX_RX_DMAC_CAM0 (0x200 + mac_ops->csr_offset) #define CGX_DMAC_CAM_ADDR_ENABLE BIT_ULL(48) +#define CGX_DMAC_CAM_ENTRY_LMACID GENMASK_ULL(50, 49) #define CGXX_CMRX_RX_DMAC_CAM1 0x400 #define CGX_RX_DMAC_ADR_MASK GENMASK_ULL(47, 0) #define CGXX_CMRX_TX_STAT0 0x700 @@ -172,4 +173,6 @@ unsigned long cgx_get_lmac_bmap(void *cgxd); void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val); u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset); int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index); +u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id); +u64 cgx_read_dmac_entry(void *cgxd, int index); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index bc0d245070338e..10e58a5d5861ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -744,6 +744,7 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); +int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 9c6f4ba2d7268b..6cc8fbb7190cd6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -63,7 +63,7 @@ static u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id]; } -static int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id) +int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id) { unsigned long pfmap; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 3cc3c6fd1d84f2..370d4ca1e5edbd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1971,10 +1971,9 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) return err; } -static int rvu_dbg_cgx_stat_display(struct seq_file *filp, void *unused) +static int rvu_dbg_derive_lmacid(struct seq_file *filp, int *lmac_id) { struct dentry *current_dir; - int err, lmac_id; char *buf; current_dir = filp->file->f_path.dentry->d_parent; @@ -1982,17 +1981,87 @@ static int rvu_dbg_cgx_stat_display(struct seq_file *filp, void *unused) if (!buf) return -EINVAL; - err = kstrtoint(buf + 1, 10, &lmac_id); - if (!err) { - err = cgx_print_stats(filp, lmac_id); - if (err) - return err; - } + return kstrtoint(buf + 1, 10, lmac_id); +} + +static int rvu_dbg_cgx_stat_display(struct seq_file *filp, void *unused) +{ + int lmac_id, err; + + err = rvu_dbg_derive_lmacid(filp, &lmac_id); + if (!err) + return cgx_print_stats(filp, lmac_id); + return err; } RVU_DEBUG_SEQ_FOPS(cgx_stat, cgx_stat_display, NULL); +static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) +{ + struct pci_dev *pdev = NULL; + void *cgxd = s->private; + char *bcast, *mcast; + u16 index, domain; + u8 dmac[ETH_ALEN]; + struct rvu *rvu; + u64 cfg, mac; + int pf; + + rvu = pci_get_drvdata(pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVID_OCTEONTX2_RVU_AF, NULL)); + if (!rvu) + return -ENODEV; + + pf = cgxlmac_to_pf(rvu, cgx_get_cgxid(cgxd), lmac_id); + domain = 2; + + pdev = pci_get_domain_bus_and_slot(domain, pf + 1, 0); + if (!pdev) + return 0; + + cfg = cgx_read_dmac_ctrl(cgxd, lmac_id); + bcast = cfg & CGX_DMAC_BCAST_MODE ? "ACCEPT" : "REJECT"; + mcast = cfg & CGX_DMAC_MCAST_MODE ? "ACCEPT" : "REJECT"; + + seq_puts(s, + "PCI dev RVUPF BROADCAST MULTICAST FILTER-MODE\n"); + seq_printf(s, "%s PF%d %9s %9s", + dev_name(&pdev->dev), pf, bcast, mcast); + if (cfg & CGX_DMAC_CAM_ACCEPT) + seq_printf(s, "%12s\n\n", "UNICAST"); + else + seq_printf(s, "%16s\n\n", "PROMISCUOUS"); + + seq_puts(s, "\nDMAC-INDEX ADDRESS\n"); + + for (index = 0 ; index < 32 ; index++) { + cfg = cgx_read_dmac_entry(cgxd, index); + /* Display enabled dmac entries associated with current lmac */ + if (lmac_id == FIELD_GET(CGX_DMAC_CAM_ENTRY_LMACID, cfg) && + FIELD_GET(CGX_DMAC_CAM_ADDR_ENABLE, cfg)) { + mac = FIELD_GET(CGX_RX_DMAC_ADR_MASK, cfg); + u64_to_ether_addr(mac, dmac); + seq_printf(s, "%7d %pM\n", index, dmac); + } + } + + return 0; +} + +static int rvu_dbg_cgx_dmac_flt_display(struct seq_file *filp, void *unused) +{ + int err, lmac_id; + + err = rvu_dbg_derive_lmacid(filp, &lmac_id); + if (!err) + return cgx_print_dmac_flt(filp, lmac_id); + + return err; +} + +RVU_DEBUG_SEQ_FOPS(cgx_dmac_flt, cgx_dmac_flt_display, NULL); + static void rvu_dbg_cgx_init(struct rvu *rvu) { struct mac_ops *mac_ops; @@ -2029,6 +2098,9 @@ static void rvu_dbg_cgx_init(struct rvu *rvu) debugfs_create_file("stats", 0600, rvu->rvu_dbg.lmac, cgx, &rvu_dbg_cgx_stat_fops); + debugfs_create_file("mac_filter", 0600, + rvu->rvu_dbg.lmac, cgx, + &rvu_dbg_cgx_dmac_flt_fops); } } } From 79d2be385e9eabe4403eb85bcc7d3efc6b936a76 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 30 Jun 2021 15:40:59 +0530 Subject: [PATCH 058/714] octeontx2-pf: offload DMAC filters to CGX/RPM block DMAC filtering can be achieved by either NPC MCAM rules or CGX/RPM MAC filters. Currently we are achieving this by NPC MCAM rules. This patch offloads DMAC filters to CGX/RPM MAC filters instead of NPC MCAM rules. Offloading DMAC filter to CGX/RPM block helps in reducing traffic to NPC block and save MCAM rules Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/Makefile | 2 +- .../marvell/octeontx2/nic/otx2_common.c | 3 + .../marvell/octeontx2/nic/otx2_common.h | 11 + .../marvell/octeontx2/nic/otx2_dmac_flt.c | 173 +++++++++++++ .../marvell/octeontx2/nic/otx2_flows.c | 229 +++++++++++++++++- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 9 + 6 files changed, 417 insertions(+), 10 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 457c94793e63b9..3254b02205ca59 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o + otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o rvu_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index cf7875d51d8793..7cccd802c4ed0e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -210,6 +210,9 @@ int otx2_set_mac_address(struct net_device *netdev, void *p) /* update dmac field in vlan offload rule */ if (pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT) otx2_install_rxvlan_offload_flow(pfvf); + /* update dmac address in ntuple and DMAC filter list */ + if (pfvf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) + otx2_dmacflt_update_pfmac_flow(pfvf); } else { return -EPERM; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 20a9c69f020fc4..8fd58cd07f50b8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -288,6 +288,9 @@ struct otx2_flow_config { u16 tc_flower_offset; u16 ntuple_max_flows; u16 tc_max_flows; + u8 dmacflt_max_flows; + u8 *bmap_to_dmacindex; + unsigned long dmacflt_bmap; struct list_head flow_list; }; @@ -329,6 +332,7 @@ struct otx2_nic { #define OTX2_FLAG_TC_FLOWER_SUPPORT BIT_ULL(11) #define OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED BIT_ULL(12) #define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED BIT_ULL(13) +#define OTX2_FLAG_DMACFLTR_SUPPORT BIT_ULL(14) u64 flags; struct otx2_qset qset; @@ -834,4 +838,11 @@ int otx2_init_tc(struct otx2_nic *nic); void otx2_shutdown_tc(struct otx2_nic *nic); int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data); +/* CGX/RPM DMAC filters support */ +int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf); +int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u8 bit_pos); +int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, u8 bit_pos); +int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos); +void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf); +void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c new file mode 100644 index 00000000000000..ffe3e94562d0b9 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Physcial Function ethernet driver + * + * Copyright (C) 2021 Marvell. + */ + +#include "otx2_common.h" + +static int otx2_dmacflt_do_add(struct otx2_nic *pf, const u8 *mac, + u8 *dmac_index) +{ + struct cgx_mac_addr_add_req *req; + struct cgx_mac_addr_add_rsp *rsp; + int err; + + mutex_lock(&pf->mbox.lock); + + req = otx2_mbox_alloc_msg_cgx_mac_addr_add(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + ether_addr_copy(req->mac_addr, mac); + err = otx2_sync_mbox_msg(&pf->mbox); + + if (!err) { + rsp = (struct cgx_mac_addr_add_rsp *) + otx2_mbox_get_rsp(&pf->mbox.mbox, 0, &req->hdr); + *dmac_index = rsp->index; + } + + mutex_unlock(&pf->mbox.lock); + return err; +} + +static int otx2_dmacflt_add_pfmac(struct otx2_nic *pf) +{ + struct cgx_mac_addr_set_or_get *req; + int err; + + mutex_lock(&pf->mbox.lock); + + req = otx2_mbox_alloc_msg_cgx_mac_addr_set(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + ether_addr_copy(req->mac_addr, pf->netdev->dev_addr); + err = otx2_sync_mbox_msg(&pf->mbox); + + mutex_unlock(&pf->mbox.lock); + return err; +} + +int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u8 bit_pos) +{ + u8 *dmacindex; + + /* Store dmacindex returned by CGX/RPM driver which will + * be used for macaddr update/remove + */ + dmacindex = &pf->flow_cfg->bmap_to_dmacindex[bit_pos]; + + if (ether_addr_equal(mac, pf->netdev->dev_addr)) + return otx2_dmacflt_add_pfmac(pf); + else + return otx2_dmacflt_do_add(pf, mac, dmacindex); +} + +static int otx2_dmacflt_do_remove(struct otx2_nic *pfvf, const u8 *mac, + u8 dmac_index) +{ + struct cgx_mac_addr_del_req *req; + int err; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_mac_addr_del(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->index = dmac_index; + + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + + return err; +} + +static int otx2_dmacflt_remove_pfmac(struct otx2_nic *pf) +{ + struct msg_req *req; + int err; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_mac_addr_reset(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + err = otx2_sync_mbox_msg(&pf->mbox); + + mutex_unlock(&pf->mbox.lock); + return err; +} + +int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, + u8 bit_pos) +{ + u8 dmacindex = pf->flow_cfg->bmap_to_dmacindex[bit_pos]; + + if (ether_addr_equal(mac, pf->netdev->dev_addr)) + return otx2_dmacflt_remove_pfmac(pf); + else + return otx2_dmacflt_do_remove(pf, mac, dmacindex); +} + +/* CGX/RPM blocks support max unicast entries of 32. + * on typical configuration MAC block associated + * with 4 lmacs, each lmac will have 8 dmac entries + */ +int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf) +{ + struct cgx_max_dmac_entries_get_rsp *rsp; + struct msg_req *msg; + int err; + + mutex_lock(&pf->mbox.lock); + msg = otx2_mbox_alloc_msg_cgx_mac_max_entries_get(&pf->mbox); + + if (!msg) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) + goto out; + + rsp = (struct cgx_max_dmac_entries_get_rsp *) + otx2_mbox_get_rsp(&pf->mbox.mbox, 0, &msg->hdr); + pf->flow_cfg->dmacflt_max_flows = rsp->max_dmac_filters; + +out: + mutex_unlock(&pf->mbox.lock); + return err; +} + +int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos) +{ + struct cgx_mac_addr_update_req *req; + int rc; + + mutex_lock(&pf->mbox.lock); + + req = otx2_mbox_alloc_msg_cgx_mac_addr_update(&pf->mbox); + + if (!req) { + mutex_unlock(&pf->mbox.lock); + rc = -ENOMEM; + } + + ether_addr_copy(req->mac_addr, mac); + req->index = pf->flow_cfg->bmap_to_dmacindex[bit_pos]; + rc = otx2_sync_mbox_msg(&pf->mbox); + + mutex_unlock(&pf->mbox.lock); + return rc; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 8c97106bdd1cf8..4d9de525802d00 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -18,6 +18,12 @@ struct otx2_flow { bool is_vf; u8 rss_ctx_id; int vf; + bool dmac_filter; +}; + +enum dmac_req { + DMAC_ADDR_UPDATE, + DMAC_ADDR_DEL }; static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_config *flow_cfg) @@ -219,6 +225,22 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) if (!pf->mac_table) return -ENOMEM; + otx2_dmacflt_get_max_cnt(pf); + + /* DMAC filters are not allocated */ + if (!pf->flow_cfg->dmacflt_max_flows) + return 0; + + pf->flow_cfg->bmap_to_dmacindex = + devm_kzalloc(pf->dev, sizeof(u8) * + pf->flow_cfg->dmacflt_max_flows, + GFP_KERNEL); + + if (!pf->flow_cfg->bmap_to_dmacindex) + return -ENOMEM; + + pf->flags |= OTX2_FLAG_DMACFLTR_SUPPORT; + return 0; } @@ -280,6 +302,12 @@ int otx2_add_macfilter(struct net_device *netdev, const u8 *mac) { struct otx2_nic *pf = netdev_priv(netdev); + if (bitmap_weight(&pf->flow_cfg->dmacflt_bmap, + pf->flow_cfg->dmacflt_max_flows)) + netdev_warn(netdev, + "Add %pM to CGX/RPM DMAC filters list as well\n", + mac); + return otx2_do_add_macfilter(pf, mac); } @@ -351,12 +379,22 @@ static void otx2_add_flow_to_list(struct otx2_nic *pfvf, struct otx2_flow *flow) list_add(&flow->list, head); } +static int otx2_get_maxflows(struct otx2_flow_config *flow_cfg) +{ + if (flow_cfg->nr_flows == flow_cfg->ntuple_max_flows || + bitmap_weight(&flow_cfg->dmacflt_bmap, + flow_cfg->dmacflt_max_flows)) + return flow_cfg->ntuple_max_flows + flow_cfg->dmacflt_max_flows; + else + return flow_cfg->ntuple_max_flows; +} + int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, u32 location) { struct otx2_flow *iter; - if (location >= pfvf->flow_cfg->ntuple_max_flows) + if (location >= otx2_get_maxflows(pfvf->flow_cfg)) return -EINVAL; list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { @@ -378,7 +416,7 @@ int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, int idx = 0; int err = 0; - nfc->data = pfvf->flow_cfg->ntuple_max_flows; + nfc->data = otx2_get_maxflows(pfvf->flow_cfg); while ((!err || err == -ENOENT) && idx < rule_cnt) { err = otx2_get_flow(pfvf, nfc, location); if (!err) @@ -760,6 +798,32 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, return 0; } +static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf, + struct ethtool_rx_flow_spec *fsp) +{ + struct ethhdr *eth_mask = &fsp->m_u.ether_spec; + struct ethhdr *eth_hdr = &fsp->h_u.ether_spec; + u64 ring_cookie = fsp->ring_cookie; + u32 flow_type; + + if (!(pfvf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)) + return false; + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); + + /* CGX/RPM block dmac filtering configured for white listing + * check for action other than DROP + */ + if (flow_type == ETHER_FLOW && ring_cookie != RX_CLS_FLOW_DISC && + !ethtool_get_flow_spec_ring_vf(ring_cookie)) { + if (is_zero_ether_addr(eth_mask->h_dest) && + is_valid_ether_addr(eth_hdr->h_dest)) + return true; + } + + return false; +} + static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) { u64 ring_cookie = flow->flow_spec.ring_cookie; @@ -818,14 +882,46 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) return err; } +static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf, + struct otx2_flow *flow) +{ + struct otx2_flow *pf_mac; + struct ethhdr *eth_hdr; + + pf_mac = kzalloc(sizeof(*pf_mac), GFP_KERNEL); + if (!pf_mac) + return -ENOMEM; + + pf_mac->entry = 0; + pf_mac->dmac_filter = true; + pf_mac->location = pfvf->flow_cfg->ntuple_max_flows; + memcpy(&pf_mac->flow_spec, &flow->flow_spec, + sizeof(struct ethtool_rx_flow_spec)); + pf_mac->flow_spec.location = pf_mac->location; + + /* Copy PF mac address */ + eth_hdr = &pf_mac->flow_spec.h_u.ether_spec; + ether_addr_copy(eth_hdr->h_dest, pfvf->netdev->dev_addr); + + /* Install DMAC filter with PF mac address */ + otx2_dmacflt_add(pfvf, eth_hdr->h_dest, 0); + + otx2_add_flow_to_list(pfvf, pf_mac); + pfvf->flow_cfg->nr_flows++; + set_bit(0, &pfvf->flow_cfg->dmacflt_bmap); + + return 0; +} + int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; struct ethtool_rx_flow_spec *fsp = &nfc->fs; struct otx2_flow *flow; + struct ethhdr *eth_hdr; bool new = false; + int err = 0; u32 ring; - int err; ring = ethtool_get_flow_spec_ring(fsp->ring_cookie); if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) @@ -834,16 +930,15 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; - if (fsp->location >= flow_cfg->ntuple_max_flows) + if (fsp->location >= otx2_get_maxflows(flow_cfg)) return -EINVAL; flow = otx2_find_flow(pfvf, fsp->location); if (!flow) { - flow = kzalloc(sizeof(*flow), GFP_ATOMIC); + flow = kzalloc(sizeof(*flow), GFP_KERNEL); if (!flow) return -ENOMEM; flow->location = fsp->location; - flow->entry = flow_cfg->flow_ent[flow->location]; new = true; } /* struct copy */ @@ -852,7 +947,54 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (fsp->flow_type & FLOW_RSS) flow->rss_ctx_id = nfc->rss_context; - err = otx2_add_flow_msg(pfvf, flow); + if (otx2_is_flow_rule_dmacfilter(pfvf, &flow->flow_spec)) { + eth_hdr = &flow->flow_spec.h_u.ether_spec; + + /* Sync dmac filter table with updated fields */ + if (flow->dmac_filter) + return otx2_dmacflt_update(pfvf, eth_hdr->h_dest, + flow->entry); + + if (bitmap_full(&flow_cfg->dmacflt_bmap, + flow_cfg->dmacflt_max_flows)) { + netdev_warn(pfvf->netdev, + "Can't insert the rule %d as max allowed dmac filters are %d\n", + flow->location + + flow_cfg->dmacflt_max_flows, + flow_cfg->dmacflt_max_flows); + err = -EINVAL; + if (new) + kfree(flow); + return err; + } + + /* Install PF mac address to DMAC filter list */ + if (!test_bit(0, &flow_cfg->dmacflt_bmap)) + otx2_add_flow_with_pfmac(pfvf, flow); + + flow->dmac_filter = true; + flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap, + flow_cfg->dmacflt_max_flows); + fsp->location = flow_cfg->ntuple_max_flows + flow->entry; + flow->flow_spec.location = fsp->location; + flow->location = fsp->location; + + set_bit(flow->entry, &flow_cfg->dmacflt_bmap); + otx2_dmacflt_add(pfvf, eth_hdr->h_dest, flow->entry); + + } else { + if (flow->location >= pfvf->flow_cfg->ntuple_max_flows) { + netdev_warn(pfvf->netdev, + "Can't insert non dmac ntuple rule at %d, allowed range %d-0\n", + flow->location, + flow_cfg->ntuple_max_flows - 1); + err = -EINVAL; + } else { + flow->entry = flow_cfg->flow_ent[flow->location]; + err = otx2_add_flow_msg(pfvf, flow); + } + } + if (err) { if (new) kfree(flow); @@ -890,20 +1032,70 @@ static int otx2_remove_flow_msg(struct otx2_nic *pfvf, u16 entry, bool all) return err; } +static void otx2_update_rem_pfmac(struct otx2_nic *pfvf, int req) +{ + struct otx2_flow *iter; + struct ethhdr *eth_hdr; + bool found = false; + + list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { + if (iter->dmac_filter && iter->entry == 0) { + eth_hdr = &iter->flow_spec.h_u.ether_spec; + if (req == DMAC_ADDR_DEL) { + otx2_dmacflt_remove(pfvf, eth_hdr->h_dest, + 0); + clear_bit(0, &pfvf->flow_cfg->dmacflt_bmap); + found = true; + } else { + ether_addr_copy(eth_hdr->h_dest, + pfvf->netdev->dev_addr); + otx2_dmacflt_update(pfvf, eth_hdr->h_dest, 0); + } + break; + } + } + + if (found) { + list_del(&iter->list); + kfree(iter); + pfvf->flow_cfg->nr_flows--; + } +} + int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; struct otx2_flow *flow; int err; - if (location >= flow_cfg->ntuple_max_flows) + if (location >= otx2_get_maxflows(flow_cfg)) return -EINVAL; flow = otx2_find_flow(pfvf, location); if (!flow) return -ENOENT; - err = otx2_remove_flow_msg(pfvf, flow->entry, false); + if (flow->dmac_filter) { + struct ethhdr *eth_hdr = &flow->flow_spec.h_u.ether_spec; + + /* user not allowed to remove dmac filter with interface mac */ + if (ether_addr_equal(pfvf->netdev->dev_addr, eth_hdr->h_dest)) + return -EPERM; + + err = otx2_dmacflt_remove(pfvf, eth_hdr->h_dest, + flow->entry); + clear_bit(flow->entry, &flow_cfg->dmacflt_bmap); + /* If all dmac filters are removed delete macfilter with + * interface mac address and configure CGX/RPM block in + * promiscuous mode + */ + if (bitmap_weight(&flow_cfg->dmacflt_bmap, + flow_cfg->dmacflt_max_flows) == 1) + otx2_update_rem_pfmac(pfvf, DMAC_ADDR_DEL); + } else { + err = otx2_remove_flow_msg(pfvf, flow->entry, false); + } + if (err) return err; @@ -1100,3 +1292,22 @@ int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable) mutex_unlock(&pf->mbox.lock); return rsp_hdr->rc; } + +void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf) +{ + struct otx2_flow *iter; + struct ethhdr *eth_hdr; + + list_for_each_entry(iter, &pf->flow_cfg->flow_list, list) { + if (iter->dmac_filter) { + eth_hdr = &iter->flow_spec.h_u.ether_spec; + otx2_dmacflt_add(pf, eth_hdr->h_dest, + iter->entry); + } + } +} + +void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf) +{ + otx2_update_rem_pfmac(pfvf, DMAC_ADDR_UPDATE); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 088c28df849d09..f300b807a85bf7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1110,6 +1110,11 @@ static int otx2_cgx_config_loopback(struct otx2_nic *pf, bool enable) struct msg_req *msg; int err; + if (enable && bitmap_weight(&pf->flow_cfg->dmacflt_bmap, + pf->flow_cfg->dmacflt_max_flows)) + netdev_warn(pf->netdev, + "CGX/RPM internal loopback might not work as DMAC filters are active\n"); + mutex_lock(&pf->mbox.lock); if (enable) msg = otx2_mbox_alloc_msg_cgx_intlbk_enable(&pf->mbox); @@ -1644,6 +1649,10 @@ int otx2_open(struct net_device *netdev) /* Restore pause frame settings */ otx2_config_pause_frm(pf); + /* Install DMAC Filters */ + if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) + otx2_dmacflt_reinstall_flows(pf); + err = otx2_rxtx_enable(pf, true); if (err) goto err_tx_stop_queues; From 856a5c97268d474282360c8a3cf4f37f6036dbec Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 1 Jul 2021 20:37:06 +0530 Subject: [PATCH 059/714] net: wwan: iosm: fix uevent reporting Change uevent env variable name to IOSM_EVENT & correct reporting format to key=value pair. Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_uevent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_uevent.c b/drivers/net/wwan/iosm/iosm_ipc_uevent.c index 2229d752926c67..d12188ffed7e4f 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_uevent.c +++ b/drivers/net/wwan/iosm/iosm_ipc_uevent.c @@ -37,7 +37,7 @@ void ipc_uevent_send(struct device *dev, char *uevent) /* Store the device and event information */ info->dev = dev; - snprintf(info->uevent, MAX_UEVENT_LEN, "%s: %s", dev_name(dev), uevent); + snprintf(info->uevent, MAX_UEVENT_LEN, "IOSM_EVENT=%s", uevent); /* Schedule uevent in process context using work queue */ schedule_work(&info->work); From 3bcfc0a2d3199d0a83d47ed67ad57a1c2f5a16d7 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 1 Jul 2021 20:37:31 +0530 Subject: [PATCH 060/714] net: wwan: iosm: remove reduandant check Remove reduandant IP session id check since required checks are in place under caller. Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index 46f76e8aae9291..e4e9461b603eb6 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -24,15 +24,7 @@ int ipc_imem_sys_wwan_open(struct iosm_imem *ipc_imem, int if_id) return -EIO; } - /* check for the interafce id - * if if_id 1 to 8 then create IP MUX channel sessions. - * To start MUX session from 0 as network interface id would start - * from 1 so map it to if_id = if_id - 1 - */ - if (if_id >= IP_MUX_SESSION_START && if_id <= IP_MUX_SESSION_END) - return ipc_mux_open_session(ipc_imem->mux, if_id - 1); - - return -EINVAL; + return ipc_mux_open_session(ipc_imem->mux, if_id - 1); } /* Release a net link to CP. */ @@ -83,13 +75,8 @@ int ipc_imem_sys_wwan_transmit(struct iosm_imem *ipc_imem, goto out; } - if (if_id >= IP_MUX_SESSION_START && if_id <= IP_MUX_SESSION_END) - /* Route the UL packet through IP MUX Layer */ - ret = ipc_mux_ul_trigger_encode(ipc_imem->mux, - if_id - 1, skb); - else - dev_err(ipc_imem->dev, - "invalid if_id %d: ", if_id); + /* Route the UL packet through IP MUX Layer */ + ret = ipc_mux_ul_trigger_encode(ipc_imem->mux, if_id - 1, skb); out: return ret; } From 5bb4eea0c5f5b9383a543293966bdf20e54988aa Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 1 Jul 2021 20:37:45 +0530 Subject: [PATCH 061/714] net: wwan: iosm: correct link-id handling Link ID to be kept intact with MBIM session ID Ex: ID 0 should be associated to MBIM session ID 0. Reported-by: Loic Poulain Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 6 +++--- drivers/net/wwan/iosm/iosm_ipc_imem_ops.h | 6 +++--- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 2 +- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index e4e9461b603eb6..0a472ce773700d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -24,7 +24,7 @@ int ipc_imem_sys_wwan_open(struct iosm_imem *ipc_imem, int if_id) return -EIO; } - return ipc_mux_open_session(ipc_imem->mux, if_id - 1); + return ipc_mux_open_session(ipc_imem->mux, if_id); } /* Release a net link to CP. */ @@ -33,7 +33,7 @@ void ipc_imem_sys_wwan_close(struct iosm_imem *ipc_imem, int if_id, { if (ipc_imem->mux && if_id >= IP_MUX_SESSION_START && if_id <= IP_MUX_SESSION_END) - ipc_mux_close_session(ipc_imem->mux, if_id - 1); + ipc_mux_close_session(ipc_imem->mux, if_id); } /* Tasklet call to do uplink transfer. */ @@ -76,7 +76,7 @@ int ipc_imem_sys_wwan_transmit(struct iosm_imem *ipc_imem, } /* Route the UL packet through IP MUX Layer */ - ret = ipc_mux_ul_trigger_encode(ipc_imem->mux, if_id - 1, skb); + ret = ipc_mux_ul_trigger_encode(ipc_imem->mux, if_id, skb); out: return ret; } diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h index fd356dafbdd6f0..2007fe23e9a567 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h @@ -27,11 +27,11 @@ #define BOOT_CHECK_DEFAULT_TIMEOUT 400 /* IP MUX channel range */ -#define IP_MUX_SESSION_START 1 -#define IP_MUX_SESSION_END 8 +#define IP_MUX_SESSION_START 0 +#define IP_MUX_SESSION_END 7 /* Default IP MUX channel */ -#define IP_MUX_SESSION_DEFAULT 1 +#define IP_MUX_SESSION_DEFAULT 0 /** * ipc_imem_sys_port_open - Open a port link to CP. diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index e634ffc6ec086d..562de275797adc 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -288,7 +288,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; - return ipc_wwan_receive(wwan, dest_skb, false, if_id + 1); + return ipc_wwan_receive(wwan, dest_skb, false, if_id); } /* Decode Flow Credit Table in the block */ diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index c999c64001f4cb..84e37c4b0f7417 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -252,8 +252,8 @@ int ipc_wwan_receive(struct iosm_wwan *ipc_wwan, struct sk_buff *skb_arg, skb->pkt_type = PACKET_HOST; - if (if_id < (IP_MUX_SESSION_START - 1) || - if_id > (IP_MUX_SESSION_END - 1)) { + if (if_id < IP_MUX_SESSION_START || + if_id > IP_MUX_SESSION_END) { ret = -EINVAL; goto free; } From c302e3a1c86f78421d58ef564ba22519b0b039c0 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 1 Jul 2021 20:39:17 +0530 Subject: [PATCH 062/714] net: wwan: iosm: fix netdev tx stats Update tx stats on successful packet consume, drop. Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index 84e37c4b0f7417..e0c19c59c5f64f 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -107,6 +107,7 @@ static int ipc_wwan_link_transmit(struct sk_buff *skb, { struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev); struct iosm_wwan *ipc_wwan = priv->ipc_wwan; + unsigned int len = skb->len; int if_id = priv->if_id; int ret; @@ -123,6 +124,8 @@ static int ipc_wwan_link_transmit(struct sk_buff *skb, /* Return code of zero is success */ if (ret == 0) { + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += len; ret = NETDEV_TX_OK; } else if (ret == -EBUSY) { ret = NETDEV_TX_BUSY; @@ -140,7 +143,8 @@ static int ipc_wwan_link_transmit(struct sk_buff *skb, ret); dev_kfree_skb_any(skb); - return ret; + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; } /* Ops structure for wwan net link */ From d7340f46beae05227f5f4a1c8cb18e81e0c3fe0e Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 1 Jul 2021 20:39:34 +0530 Subject: [PATCH 063/714] net: wwan: iosm: set default mtu Set netdev default mtu size to 1500. Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index e0c19c59c5f64f..b2357ad5d51712 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -162,6 +162,7 @@ static void ipc_wwan_setup(struct net_device *iosm_dev) iosm_dev->priv_flags |= IFF_NO_QUEUE; iosm_dev->type = ARPHRD_NONE; + iosm_dev->mtu = ETH_DATA_LEN; iosm_dev->min_mtu = ETH_MIN_MTU; iosm_dev->max_mtu = ETH_MAX_MTU; From 5d43f951b1ac797450bb4d230fdc960b739bea04 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:52 +0800 Subject: [PATCH 064/714] ptp: add ptp virtual clock driver framework This patch is to add ptp virtual clock driver framework utilizing timecounter/cyclecounter. The patch just exports two essential APIs for PTP driver. - ptp_vclock_register() - ptp_vclock_unregister() Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/Makefile | 2 +- drivers/ptp/ptp_private.h | 15 ++++ drivers/ptp/ptp_vclock.c | 150 +++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 4 +- 4 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 drivers/ptp/ptp_vclock.c diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 8673d1743faab9..28a6fe342d3eea 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -3,7 +3,7 @@ # Makefile for PTP 1588 clock support. # -ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o +ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o ptp_vclock.o ptp_kvm-$(CONFIG_X86) := ptp_kvm_x86.o ptp_kvm_common.o ptp_kvm-$(CONFIG_HAVE_ARM_SMCCC) := ptp_kvm_arm.o ptp_kvm_common.o obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 6b97155148f119..853b79b6b30ead 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -48,6 +48,19 @@ struct ptp_clock { struct kthread_delayed_work aux_work; }; +#define info_to_vclock(d) container_of((d), struct ptp_vclock, info) +#define cc_to_vclock(d) container_of((d), struct ptp_vclock, cc) +#define dw_to_vclock(d) container_of((d), struct ptp_vclock, refresh_work) + +struct ptp_vclock { + struct ptp_clock *pclock; + struct ptp_clock_info info; + struct ptp_clock *clock; + struct cyclecounter cc; + struct timecounter tc; + spinlock_t lock; /* protects tc/cc */ +}; + /* * The function queue_cnt() is safe for readers to call without * holding q->lock. Readers use this function to verify that the queue @@ -89,4 +102,6 @@ extern const struct attribute_group *ptp_groups[]; int ptp_populate_pin_groups(struct ptp_clock *ptp); void ptp_cleanup_pin_groups(struct ptp_clock *ptp); +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock); +void ptp_vclock_unregister(struct ptp_vclock *vclock); #endif diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c new file mode 100644 index 00000000000000..fc9205cc504d37 --- /dev/null +++ b/drivers/ptp/ptp_vclock.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PTP virtual clock driver + * + * Copyright 2021 NXP + */ +#include +#include "ptp_private.h" + +#define PTP_VCLOCK_CC_SHIFT 31 +#define PTP_VCLOCK_CC_MULT (1 << PTP_VCLOCK_CC_SHIFT) +#define PTP_VCLOCK_FADJ_SHIFT 9 +#define PTP_VCLOCK_FADJ_DENOMINATOR 15625ULL +#define PTP_VCLOCK_REFRESH_INTERVAL (HZ * 2) + +static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + s64 adj; + + adj = (s64)scaled_ppm << PTP_VCLOCK_FADJ_SHIFT; + adj = div_s64(adj, PTP_VCLOCK_FADJ_DENOMINATOR); + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_read(&vclock->tc); + vclock->cc.mult = PTP_VCLOCK_CC_MULT + adj; + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_adjtime(&vclock->tc, delta); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + u64 ns; + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_read(&vclock->tc); + spin_unlock_irqrestore(&vclock->lock, flags); + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int ptp_vclock_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_init(&vclock->tc, &vclock->cc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static long ptp_vclock_refresh(struct ptp_clock_info *ptp) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + struct timespec64 ts; + + ptp_vclock_gettime(&vclock->info, &ts); + + return PTP_VCLOCK_REFRESH_INTERVAL; +} + +static const struct ptp_clock_info ptp_vclock_info = { + .owner = THIS_MODULE, + .name = "ptp virtual clock", + /* The maximum ppb value that long scaled_ppm can support */ + .max_adj = 32767999, + .adjfine = ptp_vclock_adjfine, + .adjtime = ptp_vclock_adjtime, + .gettime64 = ptp_vclock_gettime, + .settime64 = ptp_vclock_settime, + .do_aux_work = ptp_vclock_refresh, +}; + +static u64 ptp_vclock_read(const struct cyclecounter *cc) +{ + struct ptp_vclock *vclock = cc_to_vclock(cc); + struct ptp_clock *ptp = vclock->pclock; + struct timespec64 ts = {}; + + if (ptp->info->gettimex64) + ptp->info->gettimex64(ptp->info, &ts, NULL); + else + ptp->info->gettime64(ptp->info, &ts); + + return timespec64_to_ns(&ts); +} + +static const struct cyclecounter ptp_vclock_cc = { + .read = ptp_vclock_read, + .mask = CYCLECOUNTER_MASK(32), + .mult = PTP_VCLOCK_CC_MULT, + .shift = PTP_VCLOCK_CC_SHIFT, +}; + +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) +{ + struct ptp_vclock *vclock; + + vclock = kzalloc(sizeof(*vclock), GFP_KERNEL); + if (!vclock) + return NULL; + + vclock->pclock = pclock; + vclock->info = ptp_vclock_info; + vclock->cc = ptp_vclock_cc; + + snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt", + pclock->index); + + spin_lock_init(&vclock->lock); + + vclock->clock = ptp_clock_register(&vclock->info, &pclock->dev); + if (IS_ERR_OR_NULL(vclock->clock)) { + kfree(vclock); + return NULL; + } + + timecounter_init(&vclock->tc, &vclock->cc, 0); + ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); + + return vclock; +} + +void ptp_vclock_unregister(struct ptp_vclock *vclock) +{ + ptp_clock_unregister(vclock->clock); + kfree(vclock); +} diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index aba237c0b3a217..b6fb771ee52472 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -11,7 +11,9 @@ #include #include #include +#include +#define PTP_CLOCK_NAME_LEN 32 /** * struct ptp_clock_request - request PTP clock event * @@ -134,7 +136,7 @@ struct ptp_system_timestamp { struct ptp_clock_info { struct module *owner; - char name[16]; + char name[PTP_CLOCK_NAME_LEN]; s32 max_adj; int n_alarm; int n_ext_ts; From 73f37068d540eba5f93ba3a0019bf479d35ebd76 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:53 +0800 Subject: [PATCH 065/714] ptp: support ptp physical/virtual clocks conversion Support ptp physical/virtual clocks conversion via sysfs. There will be a new attribute n_vclocks under ptp physical clock sysfs. - In default, the value is 0 meaning only ptp physical clock is in use. - Setting the value can create corresponding number of ptp virtual clocks to use. But current physical clock is guaranteed to stay free running. - Setting the value back to 0 can delete virtual clocks and back use physical clock again. Another new attribute max_vclocks control the maximum number of ptp vclocks. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-ptp | 20 ++++ drivers/ptp/ptp_clock.c | 26 ++++++ drivers/ptp/ptp_private.h | 21 +++++ drivers/ptp/ptp_sysfs.c | 138 ++++++++++++++++++++++++++++ 4 files changed, 205 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp index 2363ad810ddbee..d378f57c1b73c6 100644 --- a/Documentation/ABI/testing/sysfs-ptp +++ b/Documentation/ABI/testing/sysfs-ptp @@ -33,6 +33,13 @@ Description: frequency adjustment value (a positive integer) in parts per billion. +What: /sys/class/ptp/ptpN/max_vclocks +Date: May 2021 +Contact: Yangbo Lu +Description: + This file contains the maximum number of ptp vclocks. + Write integer to re-configure it. + What: /sys/class/ptp/ptpN/n_alarms Date: September 2010 Contact: Richard Cochran @@ -61,6 +68,19 @@ Description: This file contains the number of programmable pins offered by the PTP hardware clock. +What: /sys/class/ptp/ptpN/n_vclocks +Date: May 2021 +Contact: Yangbo Lu +Description: + This file contains the number of virtual PTP clocks in + use. By default, the value is 0 meaning that only the + physical clock is in use. Setting the value creates + the corresponding number of virtual clocks and causes + the physical clock to become free running. Setting the + value back to 0 deletes the virtual clocks and + switches the physical clock back to normal, adjustable + operation. + What: /sys/class/ptp/ptpN/pins Date: March 2014 Contact: Richard Cochran diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index a23a37a4d5dc34..7334f478dde773 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -76,6 +76,11 @@ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + if (ptp_vclock_in_use(ptp)) { + pr_err("ptp: virtual clock in use\n"); + return -EBUSY; + } + return ptp->info->settime64(ptp->info, tp); } @@ -97,6 +102,11 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) struct ptp_clock_info *ops; int err = -EOPNOTSUPP; + if (ptp_vclock_in_use(ptp)) { + pr_err("ptp: virtual clock in use\n"); + return -EBUSY; + } + ops = ptp->info; if (tx->modes & ADJ_SETOFFSET) { @@ -161,6 +171,7 @@ static void ptp_clock_release(struct device *dev) ptp_cleanup_pin_groups(ptp); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); + mutex_destroy(&ptp->n_vclocks_mux); ida_simple_remove(&ptp_clocks_map, ptp->index); kfree(ptp); } @@ -208,6 +219,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, spin_lock_init(&ptp->tsevq.lock); mutex_init(&ptp->tsevq_mux); mutex_init(&ptp->pincfg_mux); + mutex_init(&ptp->n_vclocks_mux); init_waitqueue_head(&ptp->tsev_wq); if (ptp->info->do_aux_work) { @@ -221,6 +233,14 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->pps_source->lookup_cookie = ptp; } + /* PTP virtual clock is being registered under physical clock */ + if (parent->class && parent->class->name && + strcmp(parent->class->name, "ptp") == 0) + ptp->is_virtual_clock = true; + + if (!ptp->is_virtual_clock) + ptp->max_vclocks = PTP_DEFAULT_MAX_VCLOCKS; + err = ptp_populate_pin_groups(ptp); if (err) goto no_pin_groups; @@ -270,6 +290,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, kworker_err: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); + mutex_destroy(&ptp->n_vclocks_mux); ida_simple_remove(&ptp_clocks_map, index); no_slot: kfree(ptp); @@ -280,6 +301,11 @@ EXPORT_SYMBOL(ptp_clock_register); int ptp_clock_unregister(struct ptp_clock *ptp) { + if (ptp_vclock_in_use(ptp)) { + pr_err("ptp: virtual clock in use\n"); + return -EBUSY; + } + ptp->defunct = 1; wake_up_interruptible(&ptp->tsev_wq); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 853b79b6b30ead..87cb55953b6912 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -18,6 +18,7 @@ #define PTP_MAX_TIMESTAMPS 128 #define PTP_BUF_TIMESTAMPS 30 +#define PTP_DEFAULT_MAX_VCLOCKS 20 struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; @@ -46,6 +47,10 @@ struct ptp_clock { const struct attribute_group *pin_attr_groups[2]; struct kthread_worker *kworker; struct kthread_delayed_work aux_work; + unsigned int max_vclocks; + unsigned int n_vclocks; + struct mutex n_vclocks_mux; /* protect concurrent n_vclocks access */ + bool is_virtual_clock; }; #define info_to_vclock(d) container_of((d), struct ptp_vclock, info) @@ -74,6 +79,22 @@ static inline int queue_cnt(struct timestamp_event_queue *q) return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; } +/* Check if ptp virtual clock is in use */ +static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) +{ + bool in_use = false; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return true; + + if (!ptp->is_virtual_clock && ptp->n_vclocks) + in_use = true; + + mutex_unlock(&ptp->n_vclocks_mux); + + return in_use; +} + /* * see ptp_chardev.c */ diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index be076a91e20e60..0b05041783a54f 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -3,6 +3,7 @@ * PTP 1588 clock support - sysfs interface. * * Copyright (C) 2010 OMICRON electronics GmbH + * Copyright 2021 NXP */ #include #include @@ -148,6 +149,137 @@ static ssize_t pps_enable_store(struct device *dev, } static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); +static int unregister_vclock(struct device *dev, void *data) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_clock_info *info = ptp->info; + struct ptp_vclock *vclock; + u8 *num = data; + + vclock = info_to_vclock(info); + dev_info(dev->parent, "delete virtual clock ptp%d\n", + vclock->clock->index); + + ptp_vclock_unregister(vclock); + (*num)--; + + /* For break. Not error. */ + if (*num == 0) + return -EINVAL; + + return 0; +} + +static ssize_t n_vclocks_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + ssize_t size; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return -ERESTARTSYS; + + size = snprintf(page, PAGE_SIZE - 1, "%d\n", ptp->n_vclocks); + + mutex_unlock(&ptp->n_vclocks_mux); + + return size; +} + +static ssize_t n_vclocks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_vclock *vclock; + int err = -EINVAL; + u32 num, i; + + if (kstrtou32(buf, 0, &num)) + return err; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return -ERESTARTSYS; + + if (num > ptp->max_vclocks) { + dev_err(dev, "max value is %d\n", ptp->max_vclocks); + goto out; + } + + /* Need to create more vclocks */ + if (num > ptp->n_vclocks) { + for (i = 0; i < num - ptp->n_vclocks; i++) { + vclock = ptp_vclock_register(ptp); + if (!vclock) + goto out; + + dev_info(dev, "new virtual clock ptp%d\n", + vclock->clock->index); + } + } + + /* Need to delete vclocks */ + if (num < ptp->n_vclocks) { + i = ptp->n_vclocks - num; + device_for_each_child_reverse(dev, &i, + unregister_vclock); + } + + if (num == 0) + dev_info(dev, "only physical clock in use now\n"); + else + dev_info(dev, "guarantee physical clock free running\n"); + + ptp->n_vclocks = num; + mutex_unlock(&ptp->n_vclocks_mux); + + return count; +out: + mutex_unlock(&ptp->n_vclocks_mux); + return err; +} +static DEVICE_ATTR_RW(n_vclocks); + +static ssize_t max_vclocks_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + ssize_t size; + + size = snprintf(page, PAGE_SIZE - 1, "%d\n", ptp->max_vclocks); + + return size; +} + +static ssize_t max_vclocks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + u32 max; + + if (kstrtou32(buf, 0, &max) || max == 0) + return -EINVAL; + + if (max == ptp->max_vclocks) + return count; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return -ERESTARTSYS; + + if (max < ptp->n_vclocks) { + mutex_unlock(&ptp->n_vclocks_mux); + return -EINVAL; + } + + ptp->max_vclocks = max; + + mutex_unlock(&ptp->n_vclocks_mux); + + return count; +} +static DEVICE_ATTR_RW(max_vclocks); + static struct attribute *ptp_attrs[] = { &dev_attr_clock_name.attr, @@ -162,6 +294,8 @@ static struct attribute *ptp_attrs[] = { &dev_attr_fifo.attr, &dev_attr_period.attr, &dev_attr_pps_enable.attr, + &dev_attr_n_vclocks.attr, + &dev_attr_max_vclocks.attr, NULL }; @@ -183,6 +317,10 @@ static umode_t ptp_is_attribute_visible(struct kobject *kobj, } else if (attr == &dev_attr_pps_enable.attr) { if (!info->pps) mode = 0; + } else if (attr == &dev_attr_n_vclocks.attr || + attr == &dev_attr_max_vclocks.attr) { + if (ptp->is_virtual_clock) + mode = 0; } return mode; From 44c494c8e30e35713c7d11ca3c5ab332cbfabacf Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:54 +0800 Subject: [PATCH 066/714] ptp: track available ptp vclocks information Track available ptp vclocks information. Record index values of available ptp vclocks during registering and unregistering. This is preparation for supporting ptp vclocks info query through ethtool. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 15 ++++++++++++++- drivers/ptp/ptp_private.h | 1 + drivers/ptp/ptp_sysfs.c | 28 +++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 7334f478dde773..9205a9362a9dd4 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -196,6 +196,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, { struct ptp_clock *ptp; int err = 0, index, major = MAJOR(ptp_devt); + size_t size; if (info->n_alarm > PTP_MAX_ALARMS) return ERR_PTR(-EINVAL); @@ -238,9 +239,17 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, strcmp(parent->class->name, "ptp") == 0) ptp->is_virtual_clock = true; - if (!ptp->is_virtual_clock) + if (!ptp->is_virtual_clock) { ptp->max_vclocks = PTP_DEFAULT_MAX_VCLOCKS; + size = sizeof(int) * ptp->max_vclocks; + ptp->vclock_index = kzalloc(size, GFP_KERNEL); + if (!ptp->vclock_index) { + err = -ENOMEM; + goto no_mem_for_vclocks; + } + } + err = ptp_populate_pin_groups(ptp); if (err) goto no_pin_groups; @@ -285,6 +294,8 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, no_pps: ptp_cleanup_pin_groups(ptp); no_pin_groups: + kfree(ptp->vclock_index); +no_mem_for_vclocks: if (ptp->kworker) kthread_destroy_worker(ptp->kworker); kworker_err: @@ -309,6 +320,8 @@ int ptp_clock_unregister(struct ptp_clock *ptp) ptp->defunct = 1; wake_up_interruptible(&ptp->tsev_wq); + kfree(ptp->vclock_index); + if (ptp->kworker) { kthread_cancel_delayed_work_sync(&ptp->aux_work); kthread_destroy_worker(ptp->kworker); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 87cb55953b6912..f75fadd9b2443b 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -49,6 +49,7 @@ struct ptp_clock { struct kthread_delayed_work aux_work; unsigned int max_vclocks; unsigned int n_vclocks; + int *vclock_index; struct mutex n_vclocks_mux; /* protect concurrent n_vclocks access */ bool is_virtual_clock; }; diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 0b05041783a54f..6a36590ca77a5d 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -213,6 +213,9 @@ static ssize_t n_vclocks_store(struct device *dev, if (!vclock) goto out; + *(ptp->vclock_index + ptp->n_vclocks + i) = + vclock->clock->index; + dev_info(dev, "new virtual clock ptp%d\n", vclock->clock->index); } @@ -223,6 +226,9 @@ static ssize_t n_vclocks_store(struct device *dev, i = ptp->n_vclocks - num; device_for_each_child_reverse(dev, &i, unregister_vclock); + + for (i = 1; i <= ptp->n_vclocks - num; i++) + *(ptp->vclock_index + ptp->n_vclocks - i) = -1; } if (num == 0) @@ -256,6 +262,9 @@ static ssize_t max_vclocks_store(struct device *dev, const char *buf, size_t count) { struct ptp_clock *ptp = dev_get_drvdata(dev); + unsigned int *vclock_index; + int err = -EINVAL; + size_t size; u32 max; if (kstrtou32(buf, 0, &max) || max == 0) @@ -267,16 +276,29 @@ static ssize_t max_vclocks_store(struct device *dev, if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) return -ERESTARTSYS; - if (max < ptp->n_vclocks) { - mutex_unlock(&ptp->n_vclocks_mux); - return -EINVAL; + if (max < ptp->n_vclocks) + goto out; + + size = sizeof(int) * max; + vclock_index = kzalloc(size, GFP_KERNEL); + if (!vclock_index) { + err = -ENOMEM; + goto out; } + size = sizeof(int) * ptp->n_vclocks; + memcpy(vclock_index, ptp->vclock_index, size); + + kfree(ptp->vclock_index); + ptp->vclock_index = vclock_index; ptp->max_vclocks = max; mutex_unlock(&ptp->n_vclocks_mux); return count; +out: + mutex_unlock(&ptp->n_vclocks_mux); + return err; } static DEVICE_ATTR_RW(max_vclocks); From acb288e8047b7569fbc9af6fa6e9405315345103 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:55 +0800 Subject: [PATCH 067/714] ptp: add kernel API ptp_get_vclocks_index() Add kernel API ptp_get_vclocks_index() to get all ptp vclocks index on pclock. This is preparation for supporting ptp vclocks info query through ethtool. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 3 ++- drivers/ptp/ptp_private.h | 2 ++ drivers/ptp/ptp_vclock.c | 35 ++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 14 +++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 9205a9362a9dd4..f012fa581cf412 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -24,10 +24,11 @@ #define PTP_PPS_EVENT PPS_CAPTUREASSERT #define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC) +struct class *ptp_class; + /* private globals */ static dev_t ptp_devt; -static struct class *ptp_class; static DEFINE_IDA(ptp_clocks_map); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index f75fadd9b2443b..dba6be47706700 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -96,6 +96,8 @@ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) return in_use; } +extern struct class *ptp_class; + /* * see ptp_chardev.c */ diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index fc9205cc504d37..cefab29a059217 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -148,3 +148,38 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock) ptp_clock_unregister(vclock->clock); kfree(vclock); } + +int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_clock *ptp; + struct device *dev; + int num = 0; + + if (pclock_index < 0) + return num; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", pclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return num; + + ptp = dev_get_drvdata(dev); + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) { + put_device(dev); + return num; + } + + *vclock_index = kzalloc(sizeof(int) * ptp->n_vclocks, GFP_KERNEL); + if (!(*vclock_index)) + goto out; + + memcpy(*vclock_index, ptp->vclock_index, sizeof(int) * ptp->n_vclocks); + num = ptp->n_vclocks; +out: + mutex_unlock(&ptp->n_vclocks_mux); + put_device(dev); + return num; +} +EXPORT_SYMBOL(ptp_get_vclocks_index); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b6fb771ee52472..300a984fec8772 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -306,6 +306,18 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); */ void ptp_cancel_worker_sync(struct ptp_clock *ptp); +/** + * ptp_get_vclocks_index() - get all vclocks index on pclock, and + * caller is responsible to free memory + * of vclock_index + * + * @pclock_index: phc index of ptp pclock. + * @vclock_index: pointer to pointer of vclock index. + * + * return number of vclocks. + */ +int ptp_get_vclocks_index(int pclock_index, int **vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -325,6 +337,8 @@ static inline int ptp_schedule_worker(struct ptp_clock *ptp, { return -EOPNOTSUPP; } static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } +static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ return 0; } #endif From c156174a67070042d51d2c866146d3c934d5468c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:56 +0800 Subject: [PATCH 068/714] ethtool: add a new command for getting PHC virtual clocks Add an interface for getting PHC (PTP Hardware Clock) virtual clocks, which are based on PHC physical clock providing hardware timestamp to network packets. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 22 +++++ include/linux/ethtool.h | 10 +++ include/uapi/linux/ethtool_netlink.h | 15 ++++ net/ethtool/Makefile | 2 +- net/ethtool/common.c | 13 +++ net/ethtool/netlink.c | 10 +++ net/ethtool/netlink.h | 2 + net/ethtool/phc_vclocks.c | 94 ++++++++++++++++++++ 8 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/phc_vclocks.c diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 6ea91e41593f7c..c86628e6a23562 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -212,6 +212,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEC_SET`` set FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET`` get standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` get PHC virtual clocks info ===================================== ================================ Kernel to userspace: @@ -250,6 +251,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEC_NTF`` FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET_REPLY`` standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY`` PHC virtual clocks info ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1477,6 +1479,25 @@ Low and high bounds are inclusive, for example: etherStatsPkts512to1023Octets 512 1023 ============================= ==== ==== +PHC_VCLOCKS_GET +=============== + +Query device PHC virtual clocks information. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested reply header + ``ETHTOOL_A_PHC_VCLOCKS_NUM`` u32 PHC virtual clocks number + ``ETHTOOL_A_PHC_VCLOCKS_INDEX`` s32 PHC index array + ==================================== ====== ========================== + Request translation =================== @@ -1575,4 +1596,5 @@ are netlink only. n/a ``ETHTOOL_MSG_CABLE_TEST_ACT`` n/a ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` + n/a ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` =================================== ===================================== diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 29dbb603bc915e..232daaec56e44b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -757,6 +757,16 @@ void ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, enum ethtool_link_mode_bit_indices link_mode); +/** + * ethtool_get_phc_vclocks - Derive phc vclocks information, and caller + * is responsible to free memory of vclock_index + * @dev: pointer to net_device structure + * @vclock_index: pointer to pointer of vclock index + * + * Return number of phc vclocks + */ +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); + /** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to start of string to update diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c7135c9c37a5f1..b3b93710eff70b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -46,6 +46,7 @@ enum { ETHTOOL_MSG_FEC_SET, ETHTOOL_MSG_MODULE_EEPROM_GET, ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -88,6 +89,7 @@ enum { ETHTOOL_MSG_FEC_NTF, ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -440,6 +442,19 @@ enum { ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + /* CABLE TEST */ enum { diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 723c9a8a8cdf10..0a19470efbfb9c 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o eeprom.o stats.o + tunnels.o fec.o eeprom.o stats.o phc_vclocks.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index f9dcbad84788b5..798231b0767686 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "common.h" @@ -554,6 +555,18 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) return 0; } +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) +{ + struct ethtool_ts_info info = { }; + int num = 0; + + if (!__ethtool_get_ts_info(dev, &info)) + num = ptp_get_vclocks_index(info.phc_index, vclock_index); + + return num; +} +EXPORT_SYMBOL(ethtool_get_phc_vclocks); + const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a7346346114f4a..73e0f5b626bfba 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -248,6 +248,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, + [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -958,6 +959,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_stats_get_policy, .maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_phc_vclocks_get_policy, + .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3e25a47fd482d8..3fc395c8670219 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,6 +347,7 @@ extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; extern const struct ethnl_request_ops ethnl_stats_request_ops; +extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -382,6 +383,7 @@ extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; +extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c new file mode 100644 index 00000000000000..637b2f5297d524 --- /dev/null +++ b/net/ethtool/phc_vclocks.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 NXP + */ +#include "netlink.h" +#include "common.h" + +struct phc_vclocks_req_info { + struct ethnl_req_info base; +}; + +struct phc_vclocks_reply_data { + struct ethnl_reply_data base; + int num; + int *index; +}; + +#define PHC_VCLOCKS_REPDATA(__reply_base) \ + container_of(__reply_base, struct phc_vclocks_reply_data, base) + +const struct nla_policy ethnl_phc_vclocks_get_policy[] = { + [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + data->num = ethtool_get_phc_vclocks(dev, &data->index); + ethnl_ops_complete(dev); + + return ret; +} + +static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + int len = 0; + + if (data->num > 0) { + len += nla_total_size(sizeof(u32)); + len += nla_total_size(sizeof(s32) * data->num); + } + + return len; +} + +static int phc_vclocks_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + if (data->num <= 0) + return 0; + + if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) || + nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX, + sizeof(s32) * data->num, data->index)) + return -EMSGSIZE; + + return 0; +} + +static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + kfree(data->index); +} + +const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = { + .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER, + .req_info_size = sizeof(struct phc_vclocks_req_info), + .reply_data_size = sizeof(struct phc_vclocks_reply_data), + + .prepare_data = phc_vclocks_prepare_data, + .reply_size = phc_vclocks_reply_size, + .fill_reply = phc_vclocks_fill_reply, + .cleanup_data = phc_vclocks_cleanup_data, +}; From 895487a3a10fb3a177e20dcde875515d46ccd4df Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:57 +0800 Subject: [PATCH 069/714] ptp: add kernel API ptp_convert_timestamp() Add kernel API ptp_convert_timestamp() to convert raw hardware timestamp to a specified ptp vclock time. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_vclock.c | 34 ++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 13 ++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index cefab29a059217..e0f87c57749a33 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -183,3 +183,37 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index) return num; } EXPORT_SYMBOL(ptp_get_vclocks_index); + +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_vclock *vclock; + struct ptp_clock *ptp; + unsigned long flags; + struct device *dev; + u64 ns; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", vclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return; + + ptp = dev_get_drvdata(dev); + if (!ptp->is_virtual_clock) { + put_device(dev); + return; + } + + vclock = info_to_vclock(ptp->info); + + ns = ktime_to_ns(hwtstamps->hwtstamp); + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_cyc2time(&vclock->tc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + put_device(dev); + hwtstamps->hwtstamp = ns_to_ktime(ns); +} +EXPORT_SYMBOL(ptp_convert_timestamp); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 300a984fec8772..71fac923772582 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,7 @@ #include #include #include +#include #define PTP_CLOCK_NAME_LEN 32 /** @@ -318,6 +319,15 @@ void ptp_cancel_worker_sync(struct ptp_clock *ptp); */ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); +/** + * ptp_convert_timestamp() - convert timestamp to a ptp vclock time + * + * @hwtstamps: skb_shared_hwtstamps structure pointer + * @vclock_index: phc index of ptp vclock. + */ +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -339,6 +349,9 @@ static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } +static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ } #endif From 6c9a0a0f2333b1e3c29fef47a8b12131fce4905b Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:58 +0800 Subject: [PATCH 070/714] mptcp: setsockopt: convert to mptcp_setsockopt_sol_socket_timestamping() Split timestamping handling into a new function mptcp_setsockopt_sol_socket_timestamping(). This is preparation for extending SO_TIMESTAMPING for PHC binding, since optval will no longer be integer. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- net/mptcp/sockopt.c | 57 +++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 092d1f635d2774..ea38cbcd2ad4cb 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -157,19 +157,7 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - switch (optname) { - case SO_TIMESTAMP_OLD: - case SO_TIMESTAMP_NEW: - case SO_TIMESTAMPNS_OLD: - case SO_TIMESTAMPNS_NEW: - sock_set_timestamp(sk, optname, !!val); - break; - case SO_TIMESTAMPING_NEW: - case SO_TIMESTAMPING_OLD: - sock_set_timestamping(sk, optname, val); - break; - } - + sock_set_timestamp(sk, optname, !!val); unlock_sock_fast(ssk, slow); } @@ -178,7 +166,8 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam } static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) + sockptr_t optval, + unsigned int optlen) { int val, ret; @@ -205,14 +194,45 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: - case SO_TIMESTAMPING_OLD: - case SO_TIMESTAMPING_NEW: return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); } return -ENOPROTOOPT; } +static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, + int optname, + sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int val, ret; + + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, + KERNEL_SOCKPTR(&val), sizeof(val)); + if (ret) + return ret; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast(ssk); + + sock_set_timestamping(sk, optname, val); + unlock_sock_fast(ssk, slow); + } + + release_sock(sk); + + return 0; +} + static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen) { @@ -299,9 +319,12 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: + return mptcp_setsockopt_sol_socket_int(msk, optname, optval, + optlen); case SO_TIMESTAMPING_OLD: case SO_TIMESTAMPING_NEW: - return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); + return mptcp_setsockopt_sol_socket_timestamping(msk, optname, + optval, optlen); case SO_LINGER: return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); case SO_RCVLOWAT: From d463126e23f112629edb01594141ca437a92a108 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:59 +0800 Subject: [PATCH 071/714] net: sock: extend SO_TIMESTAMPING for PHC binding Since PTP virtual clock support is added, there can be several PTP virtual clocks based on one PTP physical clock for timestamping. This patch is to extend SO_TIMESTAMPING API to support PHC (PTP Hardware Clock) binding by adding a new flag SOF_TIMESTAMPING_BIND_PHC. When PTP virtual clocks are in use, user space can configure to bind one for timestamping, but PTP physical clock is not supported and not needed to bind. This patch is preparation for timestamp conversion from raw timestamp to a specific PTP virtual clock time in core net. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++- include/uapi/linux/net_tstamp.h | 17 ++++++++- net/core/sock.c | 65 +++++++++++++++++++++++++++++++-- net/ethtool/common.c | 1 + net/mptcp/sockopt.c | 23 +++++++++--- 5 files changed, 101 insertions(+), 13 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8bdd80027ffbd1..f23cb259b0e247 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -316,7 +316,9 @@ struct bpf_local_storage; * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only - * @sk_tsflags: SO_TIMESTAMPING socket options + * @sk_tsflags: SO_TIMESTAMPING flags + * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock + * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals @@ -493,6 +495,7 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; + int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; @@ -2755,7 +2758,8 @@ void sock_def_readable(struct sock *sk); int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); void sock_set_timestamp(struct sock *sk, int optname, bool valbool); -int sock_set_timestamping(struct sock *sk, int optname, int val); +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping); void sock_enable_timestamps(struct sock *sk); void sock_no_linger(struct sock *sk); diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 7ed0b3d1c00a1d..fcc61c73a6668c 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,7 +13,7 @@ #include #include /* for SO_TIMESTAMPING */ -/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +/* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), @@ -30,8 +30,9 @@ enum { SOF_TIMESTAMPING_OPT_STATS = (1<<12), SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), + SOF_TIMESTAMPING_BIND_PHC = (1 << 15), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; @@ -46,6 +47,18 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) +/** + * struct so_timestamping - SO_TIMESTAMPING parameter + * + * @flags: SO_TIMESTAMPING flags + * @bind_phc: Index of PTP virtual clock bound to sock. This is available + * if flag SOF_TIMESTAMPING_BIND_PHC is set. + */ +struct so_timestamping { + int flags; + int bind_phc; +}; + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * diff --git a/net/core/sock.c b/net/core/sock.c index dd9599656c40d7..cad10711220431 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #include +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -810,8 +812,47 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool) } } -int sock_set_timestamping(struct sock *sk, int optname, int val) +static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) { + struct net *net = sock_net(sk); + struct net_device *dev = NULL; + bool match = false; + int *vclock_index; + int i, num; + + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(net, sk->sk_bound_dev_if); + + if (!dev) { + pr_err("%s: sock not bind to device\n", __func__); + return -EOPNOTSUPP; + } + + num = ethtool_get_phc_vclocks(dev, &vclock_index); + for (i = 0; i < num; i++) { + if (*(vclock_index + i) == phc_index) { + match = true; + break; + } + } + + if (num > 0) + kfree(vclock_index); + + if (!match) + return -EINVAL; + + sk->sk_bind_phc = phc_index; + + return 0; +} + +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping) +{ + int val = timestamping.flags; + int ret; + if (val & ~SOF_TIMESTAMPING_MASK) return -EINVAL; @@ -832,6 +873,12 @@ int sock_set_timestamping(struct sock *sk, int optname, int val) !(val & SOF_TIMESTAMPING_OPT_TSONLY)) return -EINVAL; + if (val & SOF_TIMESTAMPING_BIND_PHC) { + ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc); + if (ret) + return ret; + } + sk->sk_tsflags = val; sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); @@ -907,6 +954,7 @@ EXPORT_SYMBOL(sock_set_mark); int sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { + struct so_timestamping timestamping; struct sock_txtime sk_txtime; struct sock *sk = sock->sk; int val; @@ -1073,7 +1121,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: - ret = sock_set_timestamping(sk, optname, val); + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else { + memset(×tamping, 0, sizeof(timestamping)); + timestamping.flags = val; + } + ret = sock_set_timestamping(sk, optname, timestamping); break; case SO_RCVLOWAT: @@ -1348,6 +1404,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct __kernel_old_timeval tm; struct __kernel_sock_timeval stm; struct sock_txtime txtime; + struct so_timestamping timestamping; } v; int lv = sizeof(int); @@ -1451,7 +1508,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_TIMESTAMPING_OLD: - v.val = sk->sk_tsflags; + lv = sizeof(v.timestamping); + v.timestamping.flags = sk->sk_tsflags; + v.timestamping.bind_phc = sk->sk_bind_phc; break; case SO_RCVTIMEO_OLD: diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 798231b0767686..c63e0739dc6acd 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -398,6 +398,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats", [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", + [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index ea38cbcd2ad4cb..8c03afac5ca034 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -207,14 +207,25 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val, ret; + struct so_timestamping timestamping; + int ret; - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else if (optlen == sizeof(int)) { + memset(×tamping, 0, sizeof(timestamping)); + + if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) + return -EFAULT; + } else { + return -EINVAL; + } ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, - KERNEL_SOCKPTR(&val), sizeof(val)); + KERNEL_SOCKPTR(×tamping), + sizeof(timestamping)); if (ret) return ret; @@ -224,7 +235,7 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, val); + sock_set_timestamping(sk, optname, timestamping); unlock_sock_fast(ssk, slow); } From d7c08826558811e1bf88de3750a7051f27d0e25c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:12:00 +0800 Subject: [PATCH 072/714] net: socket: support hardware timestamp conversion to PHC bound This patch is to support hardware timestamp conversion to PHC bound. This applies to both RX and TX since their skb handling (for TX, it's skb clone in error queue) all goes through __sock_recv_timestamp. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- net/socket.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/socket.c b/net/socket.c index bd9233da24979f..0b2dad3bdf7fe9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,7 @@ #include #include #include +#include #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; @@ -873,12 +874,18 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && - !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { - empty = 0; - if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && - !skb_is_err_queue(skb)) - put_ts_pktinfo(msg, skb); + !skb_is_swtx_tstamp(skb, false_tstamp)) { + if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC) + ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc); + + if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp, + tss.ts + 2)) { + empty = 0; + + if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + !skb_is_err_queue(skb)) + put_ts_pktinfo(msg, skb); + } } if (!empty) { if (sock_flag(sk, SOCK_TSTAMP_NEW)) From 2214d7032479e50ff1fa7276e6efe1df08173fbf Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:12:01 +0800 Subject: [PATCH 073/714] selftests/net: timestamping: support binding PHC Support binding PHC of PTP vclock for timestamping. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- tools/testing/selftests/net/timestamping.c | 55 ++++++++++++++-------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index 21091be706889e..aee631c5284eb5 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -47,7 +47,7 @@ static void usage(const char *error) { if (error) printf("invalid option: %s\n", error); - printf("timestamping interface option*\n\n" + printf("timestamping [bind_phc_index] [option]*\n\n" "Options:\n" " IP_MULTICAST_LOOP - looping outgoing multicasts\n" " SO_TIMESTAMP - normal software time stamping, ms resolution\n" @@ -58,6 +58,7 @@ static void usage(const char *error) " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n" " SIOCGSTAMP - check last socket time stamp\n" " SIOCGSTAMPNS - more accurate socket time stamp\n" " PTPV2 - use PTPv2 messages\n"); @@ -311,7 +312,6 @@ static void recvpacket(int sock, int recvmsg_flags, int main(int argc, char **argv) { - int so_timestamping_flags = 0; int so_timestamp = 0; int so_timestampns = 0; int siocgstamp = 0; @@ -325,6 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; + struct so_timestamping so_timestamping_get = { 0, -1 }; + struct so_timestamping so_timestamping = { 0, -1 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; @@ -342,7 +344,12 @@ int main(int argc, char **argv) exit(1); } - for (i = 2; i < argc; i++) { + if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1) + val = 3; + else + val = 2; + + for (i = val; i < argc; i++) { if (!strcasecmp(argv[i], "SO_TIMESTAMP")) so_timestamp = 1; else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) @@ -356,17 +363,19 @@ int main(int argc, char **argv) else if (!strcasecmp(argv[i], "PTPV2")) ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC")) + so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC; else usage(argv[i]); } @@ -385,10 +394,10 @@ int main(int argc, char **argv) hwtstamp.ifr_data = (void *)&hwconfig; memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = - (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = - (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ? ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; @@ -413,6 +422,9 @@ int main(int argc, char **argv) sizeof(struct sockaddr_in)) < 0) bail("bind"); + if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len)) + bail("bind device"); + /* set multicast group for outgoing packets */ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ addr.sin_addr = iaddr; @@ -444,10 +456,9 @@ int main(int argc, char **argv) &enabled, sizeof(enabled)) < 0) bail("setsockopt SO_TIMESTAMPNS"); - if (so_timestamping_flags && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, - &so_timestamping_flags, - sizeof(so_timestamping_flags)) < 0) + if (so_timestamping.flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping, + sizeof(so_timestamping)) < 0) bail("setsockopt SO_TIMESTAMPING"); /* request IP_PKTINFO for debugging purposes */ @@ -468,14 +479,18 @@ int main(int argc, char **argv) else printf("SO_TIMESTAMPNS %d\n", val); - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + len = sizeof(so_timestamping_get); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get, + &len) < 0) { printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", strerror(errno)); } else { - printf("SO_TIMESTAMPING %d\n", val); - if (val != so_timestamping_flags) - printf(" not the expected value %d\n", - so_timestamping_flags); + printf("SO_TIMESTAMPING flags %d, bind phc %d\n", + so_timestamping_get.flags, so_timestamping_get.bind_phc); + if (so_timestamping_get.flags != so_timestamping.flags || + so_timestamping_get.bind_phc != so_timestamping.bind_phc) + printf(" not expected, flags %d, bind phc %d\n", + so_timestamping.flags, so_timestamping.bind_phc); } /* send packets forever every five seconds */ From 5ce15f2783332dce99ef04e2bdc927f0fa84105d Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:12:02 +0800 Subject: [PATCH 074/714] MAINTAINERS: add entry for PTP virtual clock driver Add entry for PTP virtual clock driver. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 88449b7a4c95a3..ca7287ee775bef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14890,6 +14890,13 @@ F: drivers/net/phy/dp83640* F: drivers/ptp/* F: include/linux/ptp_cl* +PTP VIRTUAL CLOCK SUPPORT +M: Yangbo Lu +L: netdev@vger.kernel.org +S: Maintained +F: drivers/ptp/ptp_vclock.c +F: net/ethtool/phc_vclocks.c + PTRACE SUPPORT M: Oleg Nesterov S: Maintained From 4f408e1fa6e10b6da72691233369172bac7d9e9b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 30 Jun 2021 14:36:17 -0400 Subject: [PATCH 075/714] ibmvnic: retry reset if there are no other resets Normally, if a reset fails due to failover or other communication error there is another reset (eg: FAILOVER) in the queue and we would process that reset. But if we are unable to communicate with PHYP or VIOS after H_FREE_CRQ, there would be no other resets in the queue and the adapter would be in an undefined state even though it was in the OPEN state earlier. While starting the reset we set the carrier to off state so we won't even get the timeout resets. If the last queued reset fails, retry it as a hard reset (after the usual 60 second settling time). Signed-off-by: Sukadev Bhattiprolu Reviewed-by: Dany Madden Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 374a75d4faeae2..ed77191d19f44c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2420,9 +2420,10 @@ static int do_passive_init(struct ibmvnic_adapter *adapter) static void __ibmvnic_reset(struct work_struct *work) { - struct ibmvnic_rwi *rwi; struct ibmvnic_adapter *adapter; bool saved_state = false; + struct ibmvnic_rwi *tmprwi; + struct ibmvnic_rwi *rwi; unsigned long flags; u32 reset_state; int rc = 0; @@ -2489,7 +2490,7 @@ static void __ibmvnic_reset(struct work_struct *work) } else { rc = do_reset(adapter, rwi, reset_state); } - kfree(rwi); + tmprwi = rwi; adapter->last_reset_time = jiffies; if (rc) @@ -2497,8 +2498,23 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); + /* + * If there is another reset queued, free the previous rwi + * and process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). + * + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. + */ + if (rwi) + kfree(tmprwi); + else if (rc) + rwi = tmprwi; + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || - rwi->reset_reason == VNIC_RESET_MOBILITY)) + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) adapter->force_reset_recovery = true; } From 09ef17863f37235fe4e65a7d991e487b9ff6e553 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Jun 2021 20:18:20 -0400 Subject: [PATCH 076/714] Documentation: add more details in tipc.rst kernel-doc for TIPC is too simple, we need to add more information for it. This patch is to extend the abstract, and add the Features and Links items. Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- Documentation/networking/tipc.rst | 121 +++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/tipc.rst b/Documentation/networking/tipc.rst index 76775f24cdc84d..ab63d298cca21d 100644 --- a/Documentation/networking/tipc.rst +++ b/Documentation/networking/tipc.rst @@ -4,10 +4,125 @@ Linux Kernel TIPC ================= -TIPC (Transparent Inter Process Communication) is a protocol that is -specially designed for intra-cluster communication. +Introduction +============ -For more information about TIPC, see http://tipc.sourceforge.net. +TIPC (Transparent Inter Process Communication) is a protocol that is specially +designed for intra-cluster communication. It can be configured to transmit +messages either on UDP or directly across Ethernet. Message delivery is +sequence guaranteed, loss free and flow controlled. Latency times are shorter +than with any other known protocol, while maximal throughput is comparable to +that of TCP. + +TIPC Features +------------- + +- Cluster wide IPC service + + Have you ever wished you had the convenience of Unix Domain Sockets even when + transmitting data between cluster nodes? Where you yourself determine the + addresses you want to bind to and use? Where you don't have to perform DNS + lookups and worry about IP addresses? Where you don't have to start timers + to monitor the continuous existence of peer sockets? And yet without the + downsides of that socket type, such as the risk of lingering inodes? + + Welcome to the Transparent Inter Process Communication service, TIPC in short, + which gives you all of this, and a lot more. + +- Service Addressing + + A fundamental concept in TIPC is that of Service Addressing which makes it + possible for a programmer to chose his own address, bind it to a server + socket and let client programs use only that address for sending messages. + +- Service Tracking + + A client wanting to wait for the availability of a server, uses the Service + Tracking mechanism to subscribe for binding and unbinding/close events for + sockets with the associated service address. + + The service tracking mechanism can also be used for Cluster Topology Tracking, + i.e., subscribing for availability/non-availability of cluster nodes. + + Likewise, the service tracking mechanism can be used for Cluster Connectivity + Tracking, i.e., subscribing for up/down events for individual links between + cluster nodes. + +- Transmission Modes + + Using a service address, a client can send datagram messages to a server socket. + + Using the same address type, it can establish a connection towards an accepting + server socket. + + It can also use a service address to create and join a Communication Group, + which is the TIPC manifestation of a brokerless message bus. + + Multicast with very good performance and scalability is available both in + datagram mode and in communication group mode. + +- Inter Node Links + + Communication between any two nodes in a cluster is maintained by one or two + Inter Node Links, which both guarantee data traffic integrity and monitor + the peer node's availability. + +- Cluster Scalability + + By applying the Overlapping Ring Monitoring algorithm on the inter node links + it is possible to scale TIPC clusters up to 1000 nodes with a maintained + neighbor failure discovery time of 1-2 seconds. For smaller clusters this + time can be made much shorter. + +- Neighbor Discovery + + Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP + multicast, when any of those services are available. If not, configured peer + IP addresses can be used. + +- Configuration + + When running TIPC in single node mode no configuration whatsoever is needed. + When running in cluster mode TIPC must as a minimum be given a node address + (before Linux 4.17) and told which interface to attach to. The "tipc" + configuration tool makes is possible to add and maintain many more + configuration parameters. + +- Performance + + TIPC message transfer latency times are better than in any other known protocol. + Maximal byte throughput for inter-node connections is still somewhat lower than + for TCP, while they are superior for intra-node and inter-container throughput + on the same host. + +- Language Support + + The TIPC user API has support for C, Python, Perl, Ruby, D and Go. + +More Information +---------------- + +- How to set up TIPC: + + http://tipc.io/getting_started.html + +- How to program with TIPC: + + http://tipc.io/programming.html + +- How to contribute to TIPC: + +- http://tipc.io/contacts.html + +- More details about TIPC specification: + + http://tipc.io/protocol.html + + +Implementation +============== + +TIPC is implemented as a kernel module in net/tipc/ directory. TIPC Base Types --------------- From ca75bcf0a83b6cc7f53a593d98ec7121c4839b43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 10:15:09 +0200 Subject: [PATCH 077/714] net: remove the caif_hsi driver The caif_hsi driver relies on a cfhsi_get_ops symbol using symbol_get, but this symbol is not provided anywhere in the kernel tree. Remove this driver given that it is dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/caif/Kconfig | 9 - drivers/net/caif/Makefile | 3 - drivers/net/caif/caif_hsi.c | 1454 ----------------------------------- include/net/caif/caif_hsi.h | 200 ----- 4 files changed, 1666 deletions(-) delete mode 100644 drivers/net/caif/caif_hsi.c delete mode 100644 include/net/caif/caif_hsi.h diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index a77124bc1f4b0b..709660cb38f872 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -20,15 +20,6 @@ config CAIF_TTY identified as N_CAIF. When this ldisc is opened from user space it will redirect the TTY's traffic into the CAIF stack. -config CAIF_HSI - tristate "CAIF HSI transport driver" - depends on CAIF - default n - help - The CAIF low level driver for CAIF over HSI. - Be aware that if you enable this then you also need to - enable a low-level HSI driver. - config CAIF_VIRTIO tristate "CAIF virtio transport driver" depends on CAIF && HAS_DMA diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index b1918c8c126c84..97f664f8016c6b 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -4,8 +4,5 @@ ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG # Serial interface obj-$(CONFIG_CAIF_TTY) += caif_serial.o -# HSI interface -obj-$(CONFIG_CAIF_HSI) += caif_hsi.o - # Virtio interface obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c deleted file mode 100644 index 3d63b15bbaa10c..00000000000000 --- a/drivers/net/caif/caif_hsi.c +++ /dev/null @@ -1,1454 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson - * Dmitry.Tarnyagin / dmitry.tarnyagin@lockless.no - */ - -#define pr_fmt(fmt) KBUILD_MODNAME fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Daniel Martensson"); -MODULE_DESCRIPTION("CAIF HSI driver"); - -/* Returns the number of padding bytes for alignment. */ -#define PAD_POW2(x, pow) ((((x)&((pow)-1)) == 0) ? 0 :\ - (((pow)-((x)&((pow)-1))))) - -static const struct cfhsi_config hsi_default_config = { - - /* Inactivity timeout on HSI, ms */ - .inactivity_timeout = HZ, - - /* Aggregation timeout (ms) of zero means no aggregation is done*/ - .aggregation_timeout = 1, - - /* - * HSI link layer flow-control thresholds. - * Threshold values for the HSI packet queue. Flow-control will be - * asserted when the number of packets exceeds q_high_mark. It will - * not be de-asserted before the number of packets drops below - * q_low_mark. - * Warning: A high threshold value might increase throughput but it - * will at the same time prevent channel prioritization and increase - * the risk of flooding the modem. The high threshold should be above - * the low. - */ - .q_high_mark = 100, - .q_low_mark = 50, - - /* - * HSI padding options. - * Warning: must be a base of 2 (& operation used) and can not be zero ! - */ - .head_align = 4, - .tail_align = 4, -}; - -#define ON 1 -#define OFF 0 - -static LIST_HEAD(cfhsi_list); - -static void cfhsi_inactivity_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, inactivity_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Schedule power down work queue. */ - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_down_work); -} - -static void cfhsi_update_aggregation_stats(struct cfhsi *cfhsi, - const struct sk_buff *skb, - int direction) -{ - struct caif_payload_info *info; - int hpad, tpad, len; - - info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - len = skb->len + hpad + tpad; - - if (direction > 0) - cfhsi->aggregation_len += len; - else if (direction < 0) - cfhsi->aggregation_len -= len; -} - -static bool cfhsi_can_send_aggregate(struct cfhsi *cfhsi) -{ - int i; - - if (cfhsi->cfg.aggregation_timeout == 0) - return true; - - for (i = 0; i < CFHSI_PRIO_BEBK; ++i) { - if (cfhsi->qhead[i].qlen) - return true; - } - - /* TODO: Use aggregation_len instead */ - if (cfhsi->qhead[CFHSI_PRIO_BEBK].qlen >= CFHSI_MAX_PKTS) - return true; - - return false; -} - -static struct sk_buff *cfhsi_dequeue(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - int i; - - for (i = 0; i < CFHSI_PRIO_LAST; ++i) { - skb = skb_dequeue(&cfhsi->qhead[i]); - if (skb) - break; - } - - return skb; -} - -static int cfhsi_tx_queue_len(struct cfhsi *cfhsi) -{ - int i, len = 0; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - len += skb_queue_len(&cfhsi->qhead[i]); - return len; -} - -static void cfhsi_abort_tx(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - - for (;;) { - spin_lock_bh(&cfhsi->lock); - skb = cfhsi_dequeue(cfhsi); - if (!skb) - break; - - cfhsi->ndev->stats.tx_errors++; - cfhsi->ndev->stats.tx_dropped++; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - kfree_skb(skb); - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); -} - -static int cfhsi_flush_fifo(struct cfhsi *cfhsi) -{ - char buffer[32]; /* Any reasonable value */ - size_t fifo_occupancy; - int ret; - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - do { - ret = cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy); - if (ret) { - netdev_warn(cfhsi->ndev, - "%s: can't get FIFO occupancy: %d.\n", - __func__, ret); - break; - } else if (!fifo_occupancy) - /* No more data, exitting normally */ - break; - - fifo_occupancy = min(sizeof(buffer), fifo_occupancy); - set_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - ret = cfhsi->ops->cfhsi_rx(buffer, fifo_occupancy, - cfhsi->ops); - if (ret) { - clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - netdev_warn(cfhsi->ndev, - "%s: can't read data: %d.\n", - __func__, ret); - break; - } - - ret = 5 * HZ; - ret = wait_event_interruptible_timeout(cfhsi->flush_fifo_wait, - !test_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits), ret); - - if (ret < 0) { - netdev_warn(cfhsi->ndev, - "%s: can't wait for flush complete: %d.\n", - __func__, ret); - break; - } else if (!ret) { - ret = -ETIMEDOUT; - netdev_warn(cfhsi->ndev, - "%s: timeout waiting for flush complete.\n", - __func__); - break; - } - } while (1); - - return ret; -} - -static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int nfrms = 0; - int pld_len = 0; - struct sk_buff *skb; - u8 *pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - - skb = cfhsi_dequeue(cfhsi); - if (!skb) - return 0; - - /* Clear offset. */ - desc->offset = 0; - - /* Check if we can embed a CAIF frame. */ - if (skb->len < CFHSI_MAX_EMB_FRM_SZ) { - struct caif_payload_info *info; - int hpad; - int tpad; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Check if frame still fits with added alignment. */ - if ((skb->len + hpad + tpad) <= CFHSI_MAX_EMB_FRM_SZ) { - u8 *pemb = desc->emb_frm; - desc->offset = CFHSI_DESC_SHORT_SZ; - *pemb = (u8)(hpad - 1); - pemb += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in embedded CAIF frame. */ - skb_copy_bits(skb, 0, pemb, skb->len); - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - } - } - - /* Create payload CAIF frames. */ - while (nfrms < CFHSI_MAX_PKTS) { - struct caif_payload_info *info; - int hpad; - int tpad; - - if (!skb) - skb = cfhsi_dequeue(cfhsi); - - if (!skb) - break; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Fill in CAIF frame length in descriptor. */ - desc->cffrm_len[nfrms] = hpad + skb->len + tpad; - - /* Fill head padding information. */ - *pfrm = (u8)(hpad - 1); - pfrm += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pfrm, skb->len); - - /* Update payload length. */ - pld_len += desc->cffrm_len[nfrms]; - - /* Update frame pointer. */ - pfrm += skb->len + tpad; - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - - /* Update number of frames. */ - nfrms++; - } - - /* Unused length fields should be zero-filled (according to SPEC). */ - while (nfrms < CFHSI_MAX_PKTS) { - desc->cffrm_len[nfrms] = 0x0000; - nfrms++; - } - - /* Check if we can piggy-back another descriptor. */ - if (cfhsi_can_send_aggregate(cfhsi)) - desc->header |= CFHSI_PIGGY_DESC; - else - desc->header &= ~CFHSI_PIGGY_DESC; - - return CFHSI_DESC_SZ + pld_len; -} - -static void cfhsi_start_tx(struct cfhsi *cfhsi) -{ - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len, res; - - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - do { - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - if (!len) { - spin_lock_bh(&cfhsi->lock); - if (unlikely(cfhsi_tx_queue_len(cfhsi))) { - spin_unlock_bh(&cfhsi->lock); - res = -EAGAIN; - continue; - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - break; - } - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - } while (res < 0); -} - -static void cfhsi_tx_done(struct cfhsi *cfhsi) -{ - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* - * Send flow on if flow off has been previously signalled - * and number of packets is below low water mark. - */ - spin_lock_bh(&cfhsi->lock); - if (cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) <= cfhsi->cfg.q_low_mark && - cfhsi->cfdev.flowctrl) { - - cfhsi->flow_off_sent = 0; - cfhsi->cfdev.flowctrl(cfhsi->ndev, ON); - } - - if (cfhsi_can_send_aggregate(cfhsi)) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_start_tx(cfhsi); - } else { - mod_timer(&cfhsi->aggregation_timer, - jiffies + cfhsi->cfg.aggregation_timeout); - spin_unlock_bh(&cfhsi->lock); - } - - return; -} - -static void cfhsi_tx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - cfhsi_tx_done(cfhsi); -} - -static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Check for embedded CAIF frame. */ - if (desc->offset) { - struct sk_buff *skb; - int len = 0; - pfrm = ((u8 *)desc) + desc->offset; - - /* Remove offset padding. */ - pfrm += *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pfrm; - len |= ((*(pfrm+1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frame. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pfrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - /* Check for piggy-backed descriptor. */ - if (desc->header & CFHSI_PIGGY_DESC) - xfer_sz += CFHSI_DESC_SZ; - - if ((xfer_sz % 4) || (xfer_sz > (CFHSI_BUF_SZ_RX - CFHSI_DESC_SZ))) { - netdev_err(cfhsi->ndev, - "%s: Invalid payload len: %d, ignored.\n", - __func__, xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_desc_len(struct cfhsi_desc *desc) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - - pr_err("Invalid descriptor. %x %x\n", desc->header, - desc->offset); - return -EPROTO; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - if (xfer_sz % 4) { - pr_err("Invalid payload len: %d, ignored.\n", xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int rx_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - /* Sanity check header and offset. */ - if (WARN_ON((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ))) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Set frame pointer to start of payload. */ - pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - plen = desc->cffrm_len; - - /* Skip already processed frames. */ - while (nfrms < cfhsi->rx_state.nfrms) { - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - /* Parse payload. */ - while (nfrms < CFHSI_MAX_PKTS && *plen) { - struct sk_buff *skb; - u8 *pcffrm = NULL; - int len; - - /* CAIF frame starts after head padding. */ - pcffrm = pfrm + *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pcffrm; - len |= ((*(pcffrm + 1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frames. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - cfhsi->rx_state.nfrms = nfrms; - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pcffrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - return rx_sz; -} - -static void cfhsi_rx_done(struct cfhsi *cfhsi) -{ - int res; - int desc_pld_len = 0, rx_len, rx_state; - struct cfhsi_desc *desc = NULL; - u8 *rx_ptr, *rx_buf; - struct cfhsi_desc *piggy_desc = NULL; - - desc = (struct cfhsi_desc *)cfhsi->rx_buf; - - netdev_dbg(cfhsi->ndev, "%s\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Update inactivity timer if pending. */ - spin_lock_bh(&cfhsi->lock); - mod_timer_pending(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - desc_pld_len = cfhsi_rx_desc_len(desc); - - if (desc_pld_len < 0) - goto out_of_sync; - - rx_buf = cfhsi->rx_buf; - rx_len = desc_pld_len; - if (desc_pld_len > 0 && (desc->header & CFHSI_PIGGY_DESC)) - rx_len += CFHSI_DESC_SZ; - if (desc_pld_len == 0) - rx_buf = cfhsi->rx_flip_buf; - } else { - rx_buf = cfhsi->rx_flip_buf; - - rx_len = CFHSI_DESC_SZ; - if (cfhsi->rx_state.pld_len > 0 && - (desc->header & CFHSI_PIGGY_DESC)) { - - piggy_desc = (struct cfhsi_desc *) - (desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ + - cfhsi->rx_state.pld_len); - - cfhsi->rx_state.piggy_desc = true; - - /* Extract payload len from piggy-backed descriptor. */ - desc_pld_len = cfhsi_rx_desc_len(piggy_desc); - if (desc_pld_len < 0) - goto out_of_sync; - - if (desc_pld_len > 0) { - rx_len = desc_pld_len; - if (piggy_desc->header & CFHSI_PIGGY_DESC) - rx_len += CFHSI_DESC_SZ; - } - - /* - * Copy needed information from the piggy-backed - * descriptor to the descriptor in the start. - */ - memcpy(rx_buf, (u8 *)piggy_desc, - CFHSI_DESC_SHORT_SZ); - } - } - - if (desc_pld_len) { - rx_state = CFHSI_RX_STATE_PAYLOAD; - rx_ptr = rx_buf + CFHSI_DESC_SZ; - } else { - rx_state = CFHSI_RX_STATE_DESC; - rx_ptr = rx_buf; - rx_len = CFHSI_DESC_SZ; - } - - /* Initiate next read */ - if (test_bit(CFHSI_AWAKE, &cfhsi->bits)) { - /* Set up new transfer. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", - __func__); - - res = cfhsi->ops->cfhsi_rx(rx_ptr, rx_len, - cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: RX error %d.\n", - __func__, res); - cfhsi->ndev->stats.rx_errors++; - cfhsi->ndev->stats.rx_dropped++; - } - } - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - /* Extract payload from descriptor */ - if (cfhsi_rx_desc(desc, cfhsi) < 0) - goto out_of_sync; - } else { - /* Extract payload */ - if (cfhsi_rx_pld(desc, cfhsi) < 0) - goto out_of_sync; - if (piggy_desc) { - /* Extract any payload in piggyback descriptor. */ - if (cfhsi_rx_desc(piggy_desc, cfhsi) < 0) - goto out_of_sync; - /* Mark no embedded frame after extracting it */ - piggy_desc->offset = 0; - } - } - - /* Update state info */ - memset(&cfhsi->rx_state, 0, sizeof(cfhsi->rx_state)); - cfhsi->rx_state.state = rx_state; - cfhsi->rx_ptr = rx_ptr; - cfhsi->rx_len = rx_len; - cfhsi->rx_state.pld_len = desc_pld_len; - cfhsi->rx_state.piggy_desc = desc->header & CFHSI_PIGGY_DESC; - - if (rx_buf != cfhsi->rx_buf) - swap(cfhsi->rx_buf, cfhsi->rx_flip_buf); - return; - -out_of_sync: - netdev_err(cfhsi->ndev, "%s: Out of sync.\n", __func__); - print_hex_dump_bytes("--> ", DUMP_PREFIX_NONE, - cfhsi->rx_buf, CFHSI_DESC_SZ); - schedule_work(&cfhsi->out_of_sync_work); -} - -static void cfhsi_rx_slowpath(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, rx_slowpath_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_rx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (test_and_clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits)) - wake_up_interruptible(&cfhsi->flush_fifo_wait); - else - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_wake_up(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - int res; - int len; - long ret; - - cfhsi = container_of(work, struct cfhsi, wake_up_work); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (unlikely(test_bit(CFHSI_AWAKE, &cfhsi->bits))) { - /* It happenes when wakeup is requested by - * both ends at the same time. */ - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - return; - } - - /* Activate wake line. */ - cfhsi->ops->cfhsi_wake_up(cfhsi->ops); - - netdev_dbg(cfhsi->ndev, "%s: Start waiting.\n", - __func__); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_up_wait, - test_and_clear_bit(CFHSI_WAKE_UP_ACK, - &cfhsi->bits), ret); - if (unlikely(ret < 0)) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } else if (!ret) { - bool ca_wake = false; - size_t fifo_occupancy = 0; - - /* Wakeup timeout */ - netdev_dbg(cfhsi->ndev, "%s: Timeout.\n", - __func__); - - /* Check FIFO to check if modem has sent something. */ - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - netdev_dbg(cfhsi->ndev, "%s: Bytes in FIFO: %u.\n", - __func__, (unsigned) fifo_occupancy); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - - if (ca_wake) { - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - - /* Clear the CFHSI_WAKE_UP_ACK bit to prevent race. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - /* Continue execution. */ - goto wake_ack; - } - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } -wake_ack: - netdev_dbg(cfhsi->ndev, "%s: Woken.\n", - __func__); - - /* Clear power up bit. */ - set_bit(CFHSI_AWAKE, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - - /* Resume read operation. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->ops->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->ops); - - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: RX err %d.\n", __func__, res); - - /* Clear power up acknowledment. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - spin_lock_bh(&cfhsi->lock); - - /* Resume transmit if queues are not empty. */ - if (!cfhsi_tx_queue_len(cfhsi)) { - netdev_dbg(cfhsi->ndev, "%s: Peer wake, start timer.\n", - __func__); - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - return; - } - - netdev_dbg(cfhsi->ndev, "%s: Host wake.\n", - __func__); - - spin_unlock_bh(&cfhsi->lock); - - /* Create HSI frame. */ - len = cfhsi_tx_frm((struct cfhsi_desc *)cfhsi->tx_buf, cfhsi); - - if (likely(len > 0)) { - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - netdev_err(cfhsi->ndev, - "%s: Failed to create HSI frame: %d.\n", - __func__, len); - } -} - -static void cfhsi_wake_down(struct work_struct *work) -{ - long ret; - struct cfhsi *cfhsi = NULL; - size_t fifo_occupancy = 0; - int retry = CFHSI_WAKE_TOUT; - - cfhsi = container_of(work, struct cfhsi, wake_down_work); - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Deactivate wake line. */ - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_down_wait, - test_and_clear_bit(CFHSI_WAKE_DOWN_ACK, - &cfhsi->bits), ret); - if (ret < 0) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - return; - } else if (!ret) { - bool ca_wake = true; - - /* Timeout */ - netdev_err(cfhsi->ndev, "%s: Timeout.\n", __func__); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - if (!ca_wake) - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - } - - /* Check FIFO occupancy. */ - while (retry) { - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - if (!fifo_occupancy) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - retry--; - } - - if (!retry) - netdev_err(cfhsi->ndev, "%s: FIFO Timeout.\n", __func__); - - /* Clear AWAKE condition. */ - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Cancel pending RX requests. */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); -} - -static void cfhsi_out_of_sync(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(work, struct cfhsi, out_of_sync_work); - - rtnl_lock(); - dev_close(cfhsi->ndev); - rtnl_unlock(); -} - -static void cfhsi_wake_up_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - set_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_up_wait); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Schedule wake up work queue if the peer initiates. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); -} - -static void cfhsi_wake_down_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Initiating low power is only permitted by the host (us). */ - set_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_down_wait); -} - -static void cfhsi_aggregation_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, aggregation_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_start_tx(cfhsi); -} - -static netdev_tx_t cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct cfhsi *cfhsi = NULL; - int start_xfer = 0; - int timer_active; - int prio; - - if (!dev) - return -EINVAL; - - cfhsi = netdev_priv(dev); - - switch (skb->priority) { - case TC_PRIO_BESTEFFORT: - case TC_PRIO_FILLER: - case TC_PRIO_BULK: - prio = CFHSI_PRIO_BEBK; - break; - case TC_PRIO_INTERACTIVE_BULK: - prio = CFHSI_PRIO_VI; - break; - case TC_PRIO_INTERACTIVE: - prio = CFHSI_PRIO_VO; - break; - case TC_PRIO_CONTROL: - default: - prio = CFHSI_PRIO_CTL; - break; - } - - spin_lock_bh(&cfhsi->lock); - - /* Update aggregation statistics */ - cfhsi_update_aggregation_stats(cfhsi, skb, 1); - - /* Queue the SKB */ - skb_queue_tail(&cfhsi->qhead[prio], skb); - - /* Sanity check; xmit should not be called after unregister_netdev */ - if (WARN_ON(test_bit(CFHSI_SHUTDOWN, &cfhsi->bits))) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_abort_tx(cfhsi); - return -EINVAL; - } - - /* Send flow off if number of packets is above high water mark. */ - if (!cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) > cfhsi->cfg.q_high_mark && - cfhsi->cfdev.flowctrl) { - cfhsi->flow_off_sent = 1; - cfhsi->cfdev.flowctrl(cfhsi->ndev, OFF); - } - - if (cfhsi->tx_state == CFHSI_TX_STATE_IDLE) { - cfhsi->tx_state = CFHSI_TX_STATE_XFER; - start_xfer = 1; - } - - if (!start_xfer) { - /* Send aggregate if it is possible */ - bool aggregate_ready = - cfhsi_can_send_aggregate(cfhsi) && - del_timer(&cfhsi->aggregation_timer) > 0; - spin_unlock_bh(&cfhsi->lock); - if (aggregate_ready) - cfhsi_start_tx(cfhsi); - return NETDEV_TX_OK; - } - - /* Delete inactivity timer if started. */ - timer_active = del_timer_sync(&cfhsi->inactivity_timer); - - spin_unlock_bh(&cfhsi->lock); - - if (timer_active) { - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len; - int res; - - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - WARN_ON(!len); - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - /* Schedule wake up work queue if the we initiate. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); - } - - return NETDEV_TX_OK; -} - -static const struct net_device_ops cfhsi_netdevops; - -static void cfhsi_setup(struct net_device *dev) -{ - int i; - struct cfhsi *cfhsi = netdev_priv(dev); - dev->features = 0; - dev->type = ARPHRD_CAIF; - dev->flags = IFF_POINTOPOINT | IFF_NOARP; - dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; - dev->priv_flags |= IFF_NO_QUEUE; - dev->needs_free_netdev = true; - dev->netdev_ops = &cfhsi_netdevops; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - skb_queue_head_init(&cfhsi->qhead[i]); - cfhsi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; - cfhsi->cfdev.use_frag = false; - cfhsi->cfdev.use_stx = false; - cfhsi->cfdev.use_fcs = false; - cfhsi->ndev = dev; - cfhsi->cfg = hsi_default_config; -} - -static int cfhsi_open(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - int res; - - clear_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Initialize state vaiables. */ - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - cfhsi->rx_state.state = CFHSI_RX_STATE_DESC; - - /* Set flow info */ - cfhsi->flow_off_sent = 0; - - /* - * Allocate a TX buffer with the size of a HSI packet descriptors - * and the necessary room for CAIF payload frames. - */ - cfhsi->tx_buf = kzalloc(CFHSI_BUF_SZ_TX, GFP_KERNEL); - if (!cfhsi->tx_buf) { - res = -ENODEV; - goto err_alloc_tx; - } - - /* - * Allocate a RX buffer with the size of two HSI packet descriptors and - * the necessary room for CAIF payload frames. - */ - cfhsi->rx_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_buf) { - res = -ENODEV; - goto err_alloc_rx; - } - - cfhsi->rx_flip_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_flip_buf) { - res = -ENODEV; - goto err_alloc_rx_flip; - } - - /* Initialize aggregation timeout */ - cfhsi->cfg.aggregation_timeout = hsi_default_config.aggregation_timeout; - - /* Initialize recieve vaiables. */ - cfhsi->rx_ptr = cfhsi->rx_buf; - cfhsi->rx_len = CFHSI_DESC_SZ; - - /* Initialize spin locks. */ - spin_lock_init(&cfhsi->lock); - - /* Set up the driver. */ - cfhsi->cb_ops.tx_done_cb = cfhsi_tx_done_cb; - cfhsi->cb_ops.rx_done_cb = cfhsi_rx_done_cb; - cfhsi->cb_ops.wake_up_cb = cfhsi_wake_up_cb; - cfhsi->cb_ops.wake_down_cb = cfhsi_wake_down_cb; - - /* Initialize the work queues. */ - INIT_WORK(&cfhsi->wake_up_work, cfhsi_wake_up); - INIT_WORK(&cfhsi->wake_down_work, cfhsi_wake_down); - INIT_WORK(&cfhsi->out_of_sync_work, cfhsi_out_of_sync); - - /* Clear all bit fields. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Create work thread. */ - cfhsi->wq = alloc_ordered_workqueue(cfhsi->ndev->name, WQ_MEM_RECLAIM); - if (!cfhsi->wq) { - netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n", - __func__); - res = -ENODEV; - goto err_create_wq; - } - - /* Initialize wait queues. */ - init_waitqueue_head(&cfhsi->wake_up_wait); - init_waitqueue_head(&cfhsi->wake_down_wait); - init_waitqueue_head(&cfhsi->flush_fifo_wait); - - /* Setup the inactivity timer. */ - timer_setup(&cfhsi->inactivity_timer, cfhsi_inactivity_tout, 0); - /* Setup the slowpath RX timer. */ - timer_setup(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath, 0); - /* Setup the aggregation timer. */ - timer_setup(&cfhsi->aggregation_timer, cfhsi_aggregation_tout, 0); - - /* Activate HSI interface. */ - res = cfhsi->ops->cfhsi_up(cfhsi->ops); - if (res) { - netdev_err(cfhsi->ndev, - "%s: can't activate HSI interface: %d.\n", - __func__, res); - goto err_activate; - } - - /* Flush FIFO */ - res = cfhsi_flush_fifo(cfhsi); - if (res) { - netdev_err(cfhsi->ndev, "%s: Can't flush FIFO: %d.\n", - __func__, res); - goto err_net_reg; - } - return res; - - err_net_reg: - cfhsi->ops->cfhsi_down(cfhsi->ops); - err_activate: - destroy_workqueue(cfhsi->wq); - err_create_wq: - kfree(cfhsi->rx_flip_buf); - err_alloc_rx_flip: - kfree(cfhsi->rx_buf); - err_alloc_rx: - kfree(cfhsi->tx_buf); - err_alloc_tx: - return res; -} - -static int cfhsi_close(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - u8 *tx_buf, *rx_buf, *flip_buf; - - /* going to shutdown driver */ - set_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Delete timers if pending */ - del_timer_sync(&cfhsi->inactivity_timer); - del_timer_sync(&cfhsi->rx_slowpath_timer); - del_timer_sync(&cfhsi->aggregation_timer); - - /* Cancel pending RX request (if any) */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); - - /* Destroy workqueue */ - destroy_workqueue(cfhsi->wq); - - /* Store bufferes: will be freed later. */ - tx_buf = cfhsi->tx_buf; - rx_buf = cfhsi->rx_buf; - flip_buf = cfhsi->rx_flip_buf; - /* Flush transmit queues. */ - cfhsi_abort_tx(cfhsi); - - /* Deactivate interface */ - cfhsi->ops->cfhsi_down(cfhsi->ops); - - /* Free buffers. */ - kfree(tx_buf); - kfree(rx_buf); - kfree(flip_buf); - return 0; -} - -static void cfhsi_uninit(struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - ASSERT_RTNL(); - symbol_put(cfhsi_get_device); - list_del(&cfhsi->list); -} - -static const struct net_device_ops cfhsi_netdevops = { - .ndo_uninit = cfhsi_uninit, - .ndo_open = cfhsi_open, - .ndo_stop = cfhsi_close, - .ndo_start_xmit = cfhsi_xmit -}; - -static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) -{ - int i; - - if (!data) { - pr_debug("no params data found\n"); - return; - } - - i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; - /* - * Inactivity timeout in millisecs. Lowest possible value is 1, - * and highest possible is NEXT_TIMER_MAX_DELTA. - */ - if (data[i]) { - u32 inactivity_timeout = nla_get_u32(data[i]); - /* Pre-calculate inactivity timeout. */ - cfhsi->cfg.inactivity_timeout = inactivity_timeout * HZ / 1000; - if (cfhsi->cfg.inactivity_timeout == 0) - cfhsi->cfg.inactivity_timeout = 1; - else if (cfhsi->cfg.inactivity_timeout > NEXT_TIMER_MAX_DELTA) - cfhsi->cfg.inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } - - i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; - if (data[i]) - cfhsi->cfg.aggregation_timeout = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_HEAD_ALIGN; - if (data[i]) - cfhsi->cfg.head_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_TAIL_ALIGN; - if (data[i]) - cfhsi->cfg.tail_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; - if (data[i]) - cfhsi->cfg.q_high_mark = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QLOW_WATERMARK; - if (data[i]) - cfhsi->cfg.q_low_mark = nla_get_u32(data[i]); -} - -static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - cfhsi_netlink_parms(data, netdev_priv(dev)); - netdev_state_change(dev); - return 0; -} - -static const struct nla_policy caif_hsi_policy[__IFLA_CAIF_HSI_MAX + 1] = { - [__IFLA_CAIF_HSI_INACTIVITY_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_AGGREGATION_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_HEAD_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_TAIL_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QHIGH_WATERMARK] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QLOW_WATERMARK] = { .type = NLA_U32, .len = 4 }, -}; - -static size_t caif_hsi_get_size(const struct net_device *dev) -{ - int i; - size_t s = 0; - for (i = __IFLA_CAIF_HSI_UNSPEC + 1; i < __IFLA_CAIF_HSI_MAX; i++) - s += nla_total_size(caif_hsi_policy[i].len); - return s; -} - -static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - - if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, - cfhsi->cfg.inactivity_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, - cfhsi->cfg.aggregation_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, - cfhsi->cfg.head_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, - cfhsi->cfg.tail_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, - cfhsi->cfg.q_high_mark) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, - cfhsi->cfg.q_low_mark)) - return -EMSGSIZE; - - return 0; -} - -static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - struct cfhsi *cfhsi = NULL; - struct cfhsi_ops *(*get_ops)(void); - - ASSERT_RTNL(); - - cfhsi = netdev_priv(dev); - cfhsi_netlink_parms(data, cfhsi); - - get_ops = symbol_get(cfhsi_get_ops); - if (!get_ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - return -ENODEV; - } - - /* Assign the HSI device. */ - cfhsi->ops = (*get_ops)(); - if (!cfhsi->ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - goto err; - } - - /* Assign the driver to this HSI device. */ - cfhsi->ops->cb_ops = &cfhsi->cb_ops; - if (register_netdevice(dev)) { - pr_warn("%s: caif_hsi device registration failed\n", __func__); - goto err; - } - /* Add CAIF HSI device to list. */ - list_add_tail(&cfhsi->list, &cfhsi_list); - - return 0; -err: - symbol_put(cfhsi_get_ops); - return -ENODEV; -} - -static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = { - .kind = "cfhsi", - .priv_size = sizeof(struct cfhsi), - .setup = cfhsi_setup, - .maxtype = __IFLA_CAIF_HSI_MAX, - .policy = caif_hsi_policy, - .newlink = caif_hsi_newlink, - .changelink = caif_hsi_changelink, - .get_size = caif_hsi_get_size, - .fill_info = caif_hsi_fill_info, -}; - -static void __exit cfhsi_exit_module(void) -{ - struct list_head *list_node; - struct list_head *n; - struct cfhsi *cfhsi; - - rtnl_link_unregister(&caif_hsi_link_ops); - - rtnl_lock(); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); - unregister_netdevice(cfhsi->ndev); - } - rtnl_unlock(); -} - -static int __init cfhsi_init_module(void) -{ - return rtnl_link_register(&caif_hsi_link_ops); -} - -module_init(cfhsi_init_module); -module_exit(cfhsi_exit_module); diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h deleted file mode 100644 index 552cf68d28d2c3..00000000000000 --- a/include/net/caif/caif_hsi.h +++ /dev/null @@ -1,200 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson / daniel.martensson@stericsson.com - * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com - */ - -#ifndef CAIF_HSI_H_ -#define CAIF_HSI_H_ - -#include -#include -#include - -/* - * Maximum number of CAIF frames that can reside in the same HSI frame. - */ -#define CFHSI_MAX_PKTS 15 - -/* - * Maximum number of bytes used for the frame that can be embedded in the - * HSI descriptor. - */ -#define CFHSI_MAX_EMB_FRM_SZ 96 - -/* - * Decides if HSI buffers should be prefilled with 0xFF pattern for easier - * debugging. Both TX and RX buffers will be filled before the transfer. - */ -#define CFHSI_DBG_PREFILL 0 - -/* Structure describing a HSI packet descriptor. */ -#pragma pack(1) /* Byte alignment. */ -struct cfhsi_desc { - u8 header; - u8 offset; - u16 cffrm_len[CFHSI_MAX_PKTS]; - u8 emb_frm[CFHSI_MAX_EMB_FRM_SZ]; -}; -#pragma pack() /* Default alignment. */ - -/* Size of the complete HSI packet descriptor. */ -#define CFHSI_DESC_SZ (sizeof(struct cfhsi_desc)) - -/* - * Size of the complete HSI packet descriptor excluding the optional embedded - * CAIF frame. - */ -#define CFHSI_DESC_SHORT_SZ (CFHSI_DESC_SZ - CFHSI_MAX_EMB_FRM_SZ) - -/* - * Maximum bytes transferred in one transfer. - */ -#define CFHSI_MAX_CAIF_FRAME_SZ 4096 - -#define CFHSI_MAX_PAYLOAD_SZ (CFHSI_MAX_PKTS * CFHSI_MAX_CAIF_FRAME_SZ) - -/* Size of the complete HSI TX buffer. */ -#define CFHSI_BUF_SZ_TX (CFHSI_DESC_SZ + CFHSI_MAX_PAYLOAD_SZ) - -/* Size of the complete HSI RX buffer. */ -#define CFHSI_BUF_SZ_RX ((2 * CFHSI_DESC_SZ) + CFHSI_MAX_PAYLOAD_SZ) - -/* Bitmasks for the HSI descriptor. */ -#define CFHSI_PIGGY_DESC (0x01 << 7) - -#define CFHSI_TX_STATE_IDLE 0 -#define CFHSI_TX_STATE_XFER 1 - -#define CFHSI_RX_STATE_DESC 0 -#define CFHSI_RX_STATE_PAYLOAD 1 - -/* Bitmasks for power management. */ -#define CFHSI_WAKE_UP 0 -#define CFHSI_WAKE_UP_ACK 1 -#define CFHSI_WAKE_DOWN_ACK 2 -#define CFHSI_AWAKE 3 -#define CFHSI_WAKELOCK_HELD 4 -#define CFHSI_SHUTDOWN 5 -#define CFHSI_FLUSH_FIFO 6 - -#ifndef CFHSI_INACTIVITY_TOUT -#define CFHSI_INACTIVITY_TOUT (1 * HZ) -#endif /* CFHSI_INACTIVITY_TOUT */ - -#ifndef CFHSI_WAKE_TOUT -#define CFHSI_WAKE_TOUT (3 * HZ) -#endif /* CFHSI_WAKE_TOUT */ - -#ifndef CFHSI_MAX_RX_RETRIES -#define CFHSI_MAX_RX_RETRIES (10 * HZ) -#endif - -/* Structure implemented by the CAIF HSI driver. */ -struct cfhsi_cb_ops { - void (*tx_done_cb) (struct cfhsi_cb_ops *drv); - void (*rx_done_cb) (struct cfhsi_cb_ops *drv); - void (*wake_up_cb) (struct cfhsi_cb_ops *drv); - void (*wake_down_cb) (struct cfhsi_cb_ops *drv); -}; - -/* Structure implemented by HSI device. */ -struct cfhsi_ops { - int (*cfhsi_up) (struct cfhsi_ops *dev); - int (*cfhsi_down) (struct cfhsi_ops *dev); - int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_wake_up) (struct cfhsi_ops *dev); - int (*cfhsi_wake_down) (struct cfhsi_ops *dev); - int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status); - int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy); - int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev); - struct cfhsi_cb_ops *cb_ops; -}; - -/* Structure holds status of received CAIF frames processing */ -struct cfhsi_rx_state { - int state; - int nfrms; - int pld_len; - int retries; - bool piggy_desc; -}; - -/* Priority mapping */ -enum { - CFHSI_PRIO_CTL = 0, - CFHSI_PRIO_VI, - CFHSI_PRIO_VO, - CFHSI_PRIO_BEBK, - CFHSI_PRIO_LAST, -}; - -struct cfhsi_config { - u32 inactivity_timeout; - u32 aggregation_timeout; - u32 head_align; - u32 tail_align; - u32 q_high_mark; - u32 q_low_mark; -}; - -/* Structure implemented by CAIF HSI drivers. */ -struct cfhsi { - struct caif_dev_common cfdev; - struct net_device *ndev; - struct platform_device *pdev; - struct sk_buff_head qhead[CFHSI_PRIO_LAST]; - struct cfhsi_cb_ops cb_ops; - struct cfhsi_ops *ops; - int tx_state; - struct cfhsi_rx_state rx_state; - struct cfhsi_config cfg; - int rx_len; - u8 *rx_ptr; - u8 *tx_buf; - u8 *rx_buf; - u8 *rx_flip_buf; - spinlock_t lock; - int flow_off_sent; - struct list_head list; - struct work_struct wake_up_work; - struct work_struct wake_down_work; - struct work_struct out_of_sync_work; - struct workqueue_struct *wq; - wait_queue_head_t wake_up_wait; - wait_queue_head_t wake_down_wait; - wait_queue_head_t flush_fifo_wait; - struct timer_list inactivity_timer; - struct timer_list rx_slowpath_timer; - - /* TX aggregation */ - int aggregation_len; - struct timer_list aggregation_timer; - - unsigned long bits; -}; -extern struct platform_driver cfhsi_driver; - -/** - * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters. - * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before - * taking the HSI wakeline down, in milliseconds. - * When using RT Netlink to create, destroy or configure a CAIF HSI interface, - * enum ifla_caif_hsi is used to specify the configuration attributes. - */ -enum ifla_caif_hsi { - __IFLA_CAIF_HSI_UNSPEC, - __IFLA_CAIF_HSI_INACTIVITY_TOUT, - __IFLA_CAIF_HSI_AGGREGATION_TOUT, - __IFLA_CAIF_HSI_HEAD_ALIGN, - __IFLA_CAIF_HSI_TAIL_ALIGN, - __IFLA_CAIF_HSI_QHIGH_WATERMARK, - __IFLA_CAIF_HSI_QLOW_WATERMARK, - __IFLA_CAIF_HSI_MAX -}; - -struct cfhsi_ops *cfhsi_get_ops(void); - -#endif /* CAIF_HSI_H_ */ From 71158bb1f2d2da61385c58fc1114e1a1c19984ba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 30 Jun 2021 13:42:13 +0200 Subject: [PATCH 078/714] tcp: consistently disable header prediction for mptcp The MPTCP receive path is hooked only into the TCP slow-path. The DSS presence allows plain MPTCP traffic to hit that consistently. Since commit e1ff9e82e2ea ("net: mptcp: improve fallback to TCP"), when an MPTCP socket falls back to TCP, it can hit the TCP receive fast-path, and delay or stop triggering the event notification. Address the issue explicitly disabling the header prediction for MPTCP sockets. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/200 Fixes: e1ff9e82e2ea ("net: mptcp: improve fallback to TCP") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index e668f1bf780d3e..17df9b047ee46d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -686,6 +686,10 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { + /* mptcp hooks are only on the slow path */ + if (sk_is_mptcp((struct sock *)tp)) + return; + tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); From 18a419bad63b7f68a1979e28459782518e7b6bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jun 2021 09:42:44 -0700 Subject: [PATCH 079/714] udp: annotate data races around unix_sk(sk)->gso_size Accesses to unix_sk(sk)->gso_size are lockless. Add READ_ONCE()/WRITE_ONCE() around them. BUG: KCSAN: data-race in udp_lib_setsockopt / udpv6_sendmsg write to 0xffff88812d78f47c of 2 bytes by task 10849 on cpu 1: udp_lib_setsockopt+0x3b3/0x710 net/ipv4/udp.c:2696 udpv6_setsockopt+0x63/0x90 net/ipv6/udp.c:1630 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3265 __sys_setsockopt+0x18f/0x200 net/socket.c:2104 __do_sys_setsockopt net/socket.c:2115 [inline] __se_sys_setsockopt net/socket.c:2112 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2112 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88812d78f47c of 2 bytes by task 10852 on cpu 0: udpv6_sendmsg+0x161/0x16b0 net/ipv6/udp.c:1299 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:642 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2337 ___sys_sendmsg net/socket.c:2391 [inline] __sys_sendmmsg+0x315/0x4b0 net/socket.c:2477 __do_sys_sendmmsg net/socket.c:2506 [inline] __se_sys_sendmmsg net/socket.c:2503 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2503 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x0000 -> 0x0005 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10852 Comm: syz-executor.0 Not tainted 5.13.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++--- net/ipv6/udp.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62682807b4b2b8..62cd4cd52e8481 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1102,7 +1102,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } ipcm_init_sk(&ipc, inet); - ipc.gso_size = up->gso_size; + ipc.gso_size = READ_ONCE(up->gso_size); if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); @@ -2695,7 +2695,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_SEGMENT: if (val < 0 || val > USHRT_MAX) return -EINVAL; - up->gso_size = val; + WRITE_ONCE(up->gso_size, val); break; case UDP_GRO: @@ -2790,7 +2790,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_SEGMENT: - val = up->gso_size; + val = READ_ONCE(up->gso_size); break; case UDP_GRO: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 368972dbd91961..0cc7ba531b3415 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1296,7 +1296,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); ipcm6_init(&ipc6); - ipc6.gso_size = up->gso_size; + ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; From b6c8801038234a6d837dfc683d79676d4022ad79 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Jul 2021 14:02:20 +0200 Subject: [PATCH 080/714] ARM: dts: qcom-apq8060: Correct Ethernet node name and drop bogus irq property make dtbs_check: ethernet-ebi2@2,0: $nodename:0: 'ethernet-ebi2@2,0' does not match '^ethernet(@.*)?$' ethernet-ebi2@2,0: 'smsc,irq-active-low' does not match any of the regexes: 'pinctrl-[0-9]+' There is no "smsc,irq-active-low" property, as active low is the default. Signed-off-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- arch/arm/boot/dts/qcom-apq8060-dragonboard.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts b/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts index dace8ffeb99118..0a4ffd10c48464 100644 --- a/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts +++ b/arch/arm/boot/dts/qcom-apq8060-dragonboard.dts @@ -581,7 +581,7 @@ * EBI2. This has a 25MHz chrystal next to it, so no * clocking is needed. */ - ethernet-ebi2@2,0 { + ethernet@2,0 { compatible = "smsc,lan9221", "smsc,lan9115"; reg = <2 0x0 0x100>; /* @@ -598,8 +598,6 @@ phy-mode = "mii"; reg-io-width = <2>; smsc,force-external-phy; - /* IRQ on edge falling = active low */ - smsc,irq-active-low; smsc,irq-push-pull; /* From 19373d0233d04ede649eee4269922e20dd133cf2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Jul 2021 14:02:21 +0200 Subject: [PATCH 081/714] dt-bindings: net: sms911x: Convert to json-schema Convert the Smart Mixed-Signal Connectivity (SMSC) LAN911x/912x Controller Device Tree binding documentation to json-schema. Document missing properties. Make "phy-mode" not required, as many DTS files do not have it, and the Linux drivers falls back to PHY_INTERFACE_MODE_NA. Correct nodename in example. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- .../devicetree/bindings/net/gpmc-eth.txt | 2 +- .../devicetree/bindings/net/smsc,lan9115.yaml | 110 ++++++++++++++++++ .../devicetree/bindings/net/smsc911x.txt | 43 ------- 3 files changed, 111 insertions(+), 44 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/smsc,lan9115.yaml delete mode 100644 Documentation/devicetree/bindings/net/smsc911x.txt diff --git a/Documentation/devicetree/bindings/net/gpmc-eth.txt b/Documentation/devicetree/bindings/net/gpmc-eth.txt index f7da3d73ca1b2e..32821066a85b00 100644 --- a/Documentation/devicetree/bindings/net/gpmc-eth.txt +++ b/Documentation/devicetree/bindings/net/gpmc-eth.txt @@ -13,7 +13,7 @@ Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt For the properties relevant to the ethernet controller connected to the GPMC refer to the binding documentation of the device. For example, the documentation -for the SMSC 911x is Documentation/devicetree/bindings/net/smsc911x.txt +for the SMSC 911x is Documentation/devicetree/bindings/net/smsc,lan9115.yaml Child nodes need to specify the GPMC bus address width using the "bank-width" property but is possible that an ethernet controller also has a property to diff --git a/Documentation/devicetree/bindings/net/smsc,lan9115.yaml b/Documentation/devicetree/bindings/net/smsc,lan9115.yaml new file mode 100644 index 00000000000000..f86667cbcca899 --- /dev/null +++ b/Documentation/devicetree/bindings/net/smsc,lan9115.yaml @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/smsc,lan9115.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Smart Mixed-Signal Connectivity (SMSC) LAN911x/912x Controller + +maintainers: + - Shawn Guo + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + oneOf: + - const: smsc,lan9115 + - items: + - enum: + - smsc,lan89218 + - smsc,lan9117 + - smsc,lan9118 + - smsc,lan9220 + - smsc,lan9221 + - const: smsc,lan9115 + + reg: + maxItems: 1 + + reg-shift: true + + reg-io-width: + enum: [ 2, 4 ] + default: 2 + + interrupts: + minItems: 1 + items: + - description: + LAN interrupt line + - description: + Optional PME (power management event) interrupt that is able to wake + up the host system with a 50ms pulse on network activity + + clocks: + maxItems: 1 + + phy-mode: true + + smsc,irq-active-high: + type: boolean + description: Indicates the IRQ polarity is active-high + + smsc,irq-push-pull: + type: boolean + description: Indicates the IRQ type is push-pull + + smsc,force-internal-phy: + type: boolean + description: Forces SMSC LAN controller to use internal PHY + + smsc,force-external-phy: + type: boolean + description: Forces SMSC LAN controller to use external PHY + + smsc,save-mac-address: + type: boolean + description: + Indicates that MAC address needs to be saved before resetting the + controller + + reset-gpios: + maxItems: 1 + description: + A GPIO line connected to the RESET (active low) signal of the device. + On many systems this is wired high so the device goes out of reset at + power-on, but if it is under program control, this optional GPIO can + wake up in response to it. + + vdd33a-supply: + description: 3.3V analog power supply + + vddvario-supply: + description: IO logic power supply + +required: + - compatible + - reg + - interrupts + +# There are lots of bus-specific properties ("qcom,*", "samsung,*", "fsl,*", +# "gpmc,*", ...) to be found, that actually depend on the compatible value of +# the parent node. +additionalProperties: true + +examples: + - | + #include + + ethernet@f4000000 { + compatible = "smsc,lan9220", "smsc,lan9115"; + reg = <0xf4000000 0x2000000>; + phy-mode = "mii"; + interrupt-parent = <&gpio1>; + interrupts = <31>, <32>; + reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; + reg-io-width = <4>; + smsc,irq-push-pull; + }; diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt deleted file mode 100644 index acfafc8e143c4c..00000000000000 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ /dev/null @@ -1,43 +0,0 @@ -* Smart Mixed-Signal Connectivity (SMSC) LAN911x/912x Controller - -Required properties: -- compatible : Should be "smsc,lan", "smsc,lan9115" -- reg : Address and length of the io space for SMSC LAN -- interrupts : one or two interrupt specifiers - - The first interrupt is the SMSC LAN interrupt line - - The second interrupt (if present) is the PME (power - management event) interrupt that is able to wake up the host - system with a 50ms pulse on network activity -- phy-mode : See ethernet.txt file in the same directory - -Optional properties: -- reg-shift : Specify the quantity to shift the register offsets by -- reg-io-width : Specify the size (in bytes) of the IO accesses that - should be performed on the device. Valid value for SMSC LAN is - 2 or 4. If it's omitted or invalid, the size would be 2. -- smsc,irq-active-high : Indicates the IRQ polarity is active-high -- smsc,irq-push-pull : Indicates the IRQ type is push-pull -- smsc,force-internal-phy : Forces SMSC LAN controller to use - internal PHY -- smsc,force-external-phy : Forces SMSC LAN controller to use - external PHY -- smsc,save-mac-address : Indicates that mac address needs to be saved - before resetting the controller -- reset-gpios : a GPIO line connected to the RESET (active low) signal - of the device. On many systems this is wired high so the device goes - out of reset at power-on, but if it is under program control, this - optional GPIO can wake up in response to it. -- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies - -Examples: - -lan9220@f4000000 { - compatible = "smsc,lan9220", "smsc,lan9115"; - reg = <0xf4000000 0x2000000>; - phy-mode = "mii"; - interrupt-parent = <&gpio1>; - interrupts = <31>, <32>; - reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; - reg-io-width = <4>; - smsc,irq-push-pull; -}; From 6b28a86d6c0bb02119f386ec2f56efde909e9bcb Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Wed, 30 Jun 2021 17:59:35 +0800 Subject: [PATCH 082/714] net: stmmac: Terminate FPE workqueue in suspend Add stmmac_fpe_stop_wq() in stmmac_suspend() to terminate FPE workqueue during suspend. So, in suspend mode, there will be no FPE workqueue available. Without this fix, new additional FPE workqueue will be created in every suspend->resume cycle. Fixes: 5a5586112b92 ("net: stmmac: support FPE link partner hand-shaking procedure") Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8d9d6ecf8c63be..7b8404a21544cf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7171,6 +7171,7 @@ int stmmac_suspend(struct device *dev) priv->plat->rx_queues_to_use, false); stmmac_fpe_handshake(priv, false); + stmmac_fpe_stop_wq(priv); } priv->speed = SPEED_UNKNOWN; From 2342ae10d1272d411a468a85a67647dd115b344f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 1 Jul 2021 22:18:24 +0200 Subject: [PATCH 083/714] gve: Fix an error handling path in 'gve_probe()' If the 'register_netdev() call fails, we must release the resources allocated by the previous 'gve_init_priv()' call, as already done in the remove function. Add a new label and the missing 'gve_teardown_priv_resources()' in the error handling path. Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Signed-off-by: Christophe JAILLET Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 867e87af343249..44262c9f9ec2cc 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1565,7 +1565,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(dev); if (err) - goto abort_with_wq; + goto abort_with_gve_init; dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); @@ -1573,6 +1573,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) queue_work(priv->gve_wq, &priv->service_task); return 0; +abort_with_gve_init: + gve_teardown_priv_resources(priv); + abort_with_wq: destroy_workqueue(priv->gve_wq); From 6dce38b4b7ffb39539b49feca2b3ce34dbaadf02 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 1 Jul 2021 22:18:37 +0200 Subject: [PATCH 084/714] gve: Propagate error codes to caller If 'gve_probe()' fails, we should propagate the error code, instead of hard coding a -ENXIO value. Make sure that all error handling paths set a correct value for 'err'. Signed-off-by: Christophe JAILLET Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 44262c9f9ec2cc..c03984b26db47a 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1469,7 +1469,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = pci_enable_device(pdev); if (err) - return -ENXIO; + return err; err = pci_request_regions(pdev, "gvnic-cfg"); if (err) @@ -1512,6 +1512,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); if (!dev) { dev_err(&pdev->dev, "could not allocate netdev\n"); + err = -ENOMEM; goto abort_with_db_bar; } SET_NETDEV_DEV(dev, &pdev->dev); @@ -1593,7 +1594,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) abort_with_enabled: pci_disable_device(pdev); - return -ENXIO; + return err; } static void gve_remove(struct pci_dev *pdev) From 5140aaa4604ba96685dc04b4d2dde3384bbaecef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Jul 2021 17:44:07 +0200 Subject: [PATCH 085/714] s390: iucv: Avoid field over-reading memcpy() In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields. Add a wrapping struct to serve as the memcpy() source so the compiler can perform appropriate bounds checking, avoiding this future warning: In function '__fortify_memcpy', inlined from 'iucv_message_pending' at net/iucv/iucv.c:1663:4: ./include/linux/fortify-string.h:246:4: error: call to '__read_overflow2_field' declared with attribute error: detected read beyond size of field (2nd parameter) Signed-off-by: Kees Cook Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/iucv/iucv.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 349c6ac3313f7e..e6795d5a546a06 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1635,14 +1635,16 @@ struct iucv_message_pending { u8 iptype; u32 ipmsgid; u32 iptrgcls; - union { - u32 iprmmsg1_u32; - u8 iprmmsg1[4]; - } ln1msg1; - union { - u32 ipbfln1f; - u8 iprmmsg2[4]; - } ln1msg2; + struct { + union { + u32 iprmmsg1_u32; + u8 iprmmsg1[4]; + } ln1msg1; + union { + u32 ipbfln1f; + u8 iprmmsg2[4]; + } ln1msg2; + } rmmsg; u32 res1[3]; u32 ipbfln2f; u8 ippollfg; @@ -1660,10 +1662,10 @@ static void iucv_message_pending(struct iucv_irq_data *data) msg.id = imp->ipmsgid; msg.class = imp->iptrgcls; if (imp->ipflags1 & IUCV_IPRMDATA) { - memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8); + memcpy(msg.rmmsg, &imp->rmmsg, 8); msg.length = 8; } else - msg.length = imp->ln1msg2.ipbfln1f; + msg.length = imp->rmmsg.ln1msg2.ipbfln1f; msg.reply_size = imp->ipbfln2f; path->handler->message_pending(path, &msg); } From 37d220b58d52d6da88b928e11bffd83f88c6ac79 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jun 2021 12:36:41 +0200 Subject: [PATCH 086/714] selftest: netfilter: add test case for unreplied tcp connections TCP connections in UNREPLIED state (only SYN seen) can be kept alive indefinitely, as each SYN re-sets the timeout. This means that even if a peer has closed its socket the entry never times out. This also prevents re-evaluation of configured NAT rules. Add a test case that sets SYN timeout to 10 seconds, then check that the nat redirection added later eventually takes effect. This is based off a repro script from Antonio Ojea. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- .../netfilter/conntrack_tcp_unreplied.sh | 167 ++++++++++++++++++ 2 files changed, 168 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index cd6430b3998200..8748199ac10984 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 00000000000000..e7d7bf13cff539 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +waittime=20 +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + + ip netns del $ns1 + ip netns del $ns2 +} + +ipv4() { + echo -n 192.168.$1.2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns2 nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +# Create test namespaces +ip netns add $ns1 || exit 1 + +trap cleanup EXIT + +ip netns add $ns2 || exit 1 + +# Connect the namespace to the host using a veth pair +ip -net $ns1 link add name veth1 type veth peer name veth2 +ip -net $ns1 link set netns $ns2 dev veth2 + +ip -net $ns1 link set up dev lo +ip -net $ns2 link set up dev lo +ip -net $ns1 link set up dev veth1 +ip -net $ns2 link set up dev veth2 + +ip -net $ns2 addr add 10.11.11.2/24 dev veth2 +ip -net $ns2 route add default via 10.11.11.1 + +ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT + +ip -net $ns1 addr add 10.11.11.1/24 dev veth1 +ip -net $ns1 route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec $ns2 nc -l -p 8080 < /dev/null & + +# however, conntrack entries are there + +ip netns exec $ns2 nft -f - < $ns2 to the virtual ip" +ip netns exec $ns1 bash -c 'while true ; do + nc -p 60000 10.99.99.99 80 + sleep 1 + done' & + +sleep 1 + +ip netns exec $ns2 nft -f - </dev/null | wc -l) +if [ $count -eq 0 ]; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect" +for i in $(seq 1 $waittime); do + echo -n "." + + sleep 1 + + count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ $count -gt 0 ]; then + echo + echo "PASS: redirection took effect after $i seconds" + break + fi + + m=$((i%20)) + if [ $m -eq 0 ]; then + echo " waited for $i seconds" + fi +done + +expect="packets 1 bytes 60" +check_counter "$ns2" "redir" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret From e15d4cdf27cb0c1e977270270b2cea12e0955edd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jun 2021 12:36:42 +0200 Subject: [PATCH 087/714] netfilter: conntrack: do not renew entry stuck in tcp SYN_SENT state Consider: client -----> conntrack ---> Host client sends a SYN, but $Host is unreachable/silent. Client eventually gives up and the conntrack entry will time out. However, if the client is restarted with same addr/port pair, it may prevent the conntrack entry from timing out. This is noticeable when the existing conntrack entry has no NAT transformation or an outdated one and port reuse happens either on client or due to a NAT middlebox. This change prevents refresh of the timeout for SYN retransmits, so entry is going away after nf_conntrack_tcp_timeout_syn_sent seconds (default: 60). Entry will be re-created on next connection attempt, but then nat rules will be evaluated again. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f7e8baf59b51fc..eb4de92077a8b5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1134,6 +1134,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ From 4ca041f919f13783b0b03894783deee00dbca19a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Jun 2021 20:57:18 +0100 Subject: [PATCH 088/714] netfilter: nf_tables: Fix dereference of null pointer flow In the case where chain->flags & NFT_CHAIN_HW_OFFLOAD is false then nft_flow_rule_create is not called and flow is NULL. The subsequent error handling execution via label err_destroy_flow_rule will lead to a null pointer dereference on flow when calling nft_flow_rule_destroy. Since the error path to err_destroy_flow_rule has to cater for null and non-null flows, only call nft_flow_rule_destroy if flow is non-null to fix this issue. Addresses-Coverity: ("Explicity null dereference") Fixes: 3c5e44622011 ("netfilter: nf_tables: memleak in hw offload abort path") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 390d4466567f71..de182d1f7c4eda 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3446,7 +3446,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return 0; err_destroy_flow_rule: - nft_flow_rule_destroy(flow); + if (flow) + nft_flow_rule_destroy(flow); err_release_rule: nf_tables_rule_release(&ctx, rule); err_release_expr: From a23f89a9990684a0ca0cac4a2857c15d338ebe2d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 1 Jul 2021 08:02:24 +0300 Subject: [PATCH 089/714] netfilter: conntrack: nf_ct_gre_keymap_flush() removal nf_ct_gre_keymap_flush() is useless. It is called from nf_conntrack_cleanup_net_list() only and tries to remove nf_ct_gre_keymap entries from pernet gre keymap list. Though: a) at this point the list should already be empty, all its entries were deleted during the conntracks cleanup, because nf_conntrack_cleanup_net_list() executes nf_ct_iterate_cleanup(kill_all) before nf_conntrack_proto_pernet_fini(): nf_conntrack_cleanup_net_list +- nf_ct_iterate_cleanup | nf_ct_put | nf_conntrack_put | nf_conntrack_destroy | destroy_conntrack | destroy_gre_conntrack | nf_ct_gre_keymap_destroy `- nf_conntrack_proto_pernet_fini nf_ct_gre_keymap_flush b) Let's say we find that the keymap list is not empty. This means netns still has a conntrack associated with gre, in which case we should not free its memory, because this will lead to a double free and related crashes. However I doubt it could have gone unnoticed for years, obviously this does not happen in real life. So I think we can remove both nf_ct_gre_keymap_flush() and nf_conntrack_proto_pernet_fini(). Signed-off-by: Vasily Averin Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 1 - net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_proto.c | 7 ------- net/netfilter/nf_conntrack_proto_gre.c | 13 ------------- 4 files changed, 22 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 09f2efea0b9703..13807ea94cd2b4 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -30,7 +30,6 @@ void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); void nf_conntrack_proto_pernet_init(struct net *net); -void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); void nf_conntrack_proto_fini(void); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 96ba19fc8155d2..085a11f1eb43fd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2457,7 +2457,6 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) } list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_proto_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 55647409a9be9f..8f7a9837349c18 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #endif } -void nf_conntrack_proto_pernet_fini(struct net *net) -{ -#ifdef CONFIG_NF_CT_PROTO_GRE - nf_ct_gre_keymap_flush(net); -#endif -} - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index db11e403d81874..728eeb0aea8714 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net) return &net->ct.nf_ct_proto.gre; } -void nf_ct_gre_keymap_flush(struct net *net) -{ - struct nf_gre_net *net_gre = gre_pernet(net); - struct nf_ct_gre_keymap *km, *tmp; - - spin_lock_bh(&keymap_lock); - list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del_rcu(&km->list); - kfree_rcu(km, rcu); - } - spin_unlock_bh(&keymap_lock); -} - static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) { From c23a9fd209bc6f8c1fa6ee303fdf037d784a1627 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 1 Jul 2021 08:02:49 +0300 Subject: [PATCH 090/714] netfilter: ctnetlink: suspicious RCU usage in ctnetlink_dump_helpinfo Two patches listed below removed ctnetlink_dump_helpinfo call from under rcu_read_lock. Now its rcu_dereference generates following warning: ============================= WARNING: suspicious RCU usage 5.13.0+ #5 Not tainted ----------------------------- net/netfilter/nf_conntrack_netlink.c:221 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 stack backtrace: CPU: 1 PID: 2251 Comm: conntrack Not tainted 5.13.0+ #5 Call Trace: dump_stack+0x7f/0xa1 ctnetlink_dump_helpinfo+0x134/0x150 [nf_conntrack_netlink] ctnetlink_fill_info+0x2c2/0x390 [nf_conntrack_netlink] ctnetlink_dump_table+0x13f/0x370 [nf_conntrack_netlink] netlink_dump+0x10c/0x370 __netlink_dump_start+0x1a7/0x260 ctnetlink_get_conntrack+0x1e5/0x250 [nf_conntrack_netlink] nfnetlink_rcv_msg+0x613/0x993 [nfnetlink] netlink_rcv_skb+0x50/0x100 nfnetlink_rcv+0x55/0x120 [nfnetlink] netlink_unicast+0x181/0x260 netlink_sendmsg+0x23f/0x460 sock_sendmsg+0x5b/0x60 __sys_sendto+0xf1/0x160 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x36/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 49ca022bccc5 ("netfilter: ctnetlink: don't dump ct extensions of unconfirmed conntracks") Fixes: 0b35f6031a00 ("netfilter: Remove duplicated rcu_read_lock.") Signed-off-by: Vasily Averin Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4e1a9dba707730..e81af33b233b18 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -218,6 +218,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (!help) return 0; + rcu_read_lock(); helper = rcu_dereference(help->helper); if (!helper) goto out; @@ -233,9 +234,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, nla_nest_end(skb, nest_helper); out: + rcu_read_unlock(); return 0; nla_put_failure: + rcu_read_unlock(); return -1; } From 633fa666401c42f9a106a509b7702c58c84524e2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 1 Jul 2021 15:16:53 +0200 Subject: [PATCH 091/714] net/sched: sch_taprio: fix typo in comment I have checked that the IEEE standard 802.1Q-2018 section 8.6.9.4.5 is called AdminGateStates. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 66fe2b82af9aa6..07b30d0601d7e3 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -564,7 +564,7 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) /* if there's no entry, it means that the schedule didn't * start yet, so force all gates to be open, this is in * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 - * "AdminGateSates" + * "AdminGateStates" */ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; From bde3c8ffdd4153a3e9f0b0d51d972b30113b35ac Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 1 Jul 2021 22:41:19 +0200 Subject: [PATCH 092/714] gve: Simplify code and axe the use of a deprecated API The wrappers in include/linux/pci-dma-compat.h should go away. Replace 'pci_set_dma_mask/pci_set_consistent_dma_mask' by an equivalent and less verbose 'dma_set_mask_and_coherent()' call. Signed-off-by: Christophe JAILLET Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index c03984b26db47a..099a2bc5ae6704 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1477,19 +1477,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); goto abort_with_pci_region; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - dev_err(&pdev->dev, - "Failed to set consistent dma mask: err=%d\n", err); - goto abort_with_pci_region; - } - reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); if (!reg_bar) { dev_err(&pdev->dev, "Failed to map pci bar!\n"); From 40fc3054b45820c28ea3c65e2c86d041dc244a8a Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 2 Jul 2021 02:47:00 +0300 Subject: [PATCH 093/714] net: ipv6: fix return value of ip6_skb_dst_mtu Commit 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") introduced ip6_skb_dst_mtu with return value of signed int which is inconsistent with actually returned values. Also 2 users of this function actually assign its value to unsigned int variable and only __xfrm6_output assigns result of this function to signed variable but actually uses as unsigned in further comparisons and calls. Change this function to return unsigned int value. Fixes: 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") Reviewed-by: David Ahern Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/xfrm6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f14149df5a654d..625a38ccb5d945 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,7 +263,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { int mtu; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 57fa27c1cdf96f..d0d280077721b8 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -49,7 +49,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu; + unsigned int mtu; bool toobig; #ifdef CONFIG_NETFILTER From 1bfa4d0cb5adf954e0f4870a3ecb7cb19506320c Mon Sep 17 00:00:00 2001 From: Bailey Forrest Date: Thu, 1 Jul 2021 20:13:36 -0700 Subject: [PATCH 094/714] gve: DQO: Remove incorrect prefetch The prefetch is incorrectly using the dma address instead of the virtual address. It's supposed to be: prefetch((char *)buf_state->page_info.page_address + buf_state->page_info.page_offset) However, after correcting this mistake, there is no evidence of performance improvement. Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path") Signed-off-by: Bailey Forrest Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 77bb8227f89b5c..8500621b2cd412 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -566,13 +566,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } - /* Prefetch the payload header. */ - prefetch((char *)buf_state->addr + buf_state->page_info.page_offset); -#if L1_CACHE_BYTES < 128 - prefetch((char *)buf_state->addr + buf_state->page_info.page_offset + - L1_CACHE_BYTES); -#endif - if (eop && buf_len <= priv->rx_copybreak) { rx->skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len, 0); From 8955b90c3cdad199137809aac8ccbbb585355913 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 2 Jul 2021 11:34:31 +0800 Subject: [PATCH 095/714] net/sched: act_ct: fix err check for nf_conntrack_confirm The confirm operation should be checked. If there are any failed, the packet should be dropped like in ovs and netfilter. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/act_ct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a656baa321fe16..a62f404d0e1657 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1026,7 +1026,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, /* This will take care of sending queued events * even if the connection is already confirmed. */ - nf_conntrack_confirm(skb); + if (nf_conntrack_confirm(skb) != NF_ACCEPT) + goto drop; } if (!skip_add) From 561022acb1ce62e50f7a8258687a21b84282a4cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Jul 2021 13:09:03 -0700 Subject: [PATCH 096/714] tcp: annotate data races around tp->mtu_info While tp->mtu_info is read while socket is owned, the write sides happen from err handlers (tcp_v[46]_mtu_reduced) which only own the socket spinlock. Fixes: 563d34d05786 ("tcp: dont drop MTU reduction indications") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e66ad6bfe8083b..b9dc2d6197be8b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -342,7 +342,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; - mtu = tcp_sk(sk)->mtu_info; + mtu = READ_ONCE(tcp_sk(sk)->mtu_info); dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -546,7 +546,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) if (sk->sk_state == TCP_LISTEN) goto out; - tp->mtu_info = info; + WRITE_ONCE(tp->mtu_info, info); if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); } else { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 578ab6305c3f84..593c32fe57ed13 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -352,7 +352,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; - dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); + dst = inet6_csk_update_pmtu(sk, READ_ONCE(tcp_sk(sk)->mtu_info)); if (!dst) return; @@ -443,7 +443,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; - tp->mtu_info = ntohl(info); + WRITE_ONCE(tp->mtu_info, ntohl(info)); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, From 81b4a0cc7565b08cadd0d02bae3434f127d1d72a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Jul 2021 07:41:01 -0700 Subject: [PATCH 097/714] sock: fix error in sock_setsockopt() Some tests are failing, John bisected the issue to a recent commit. sock_set_timestamp() parameters should be : 1) sk 2) optname 3) valbool Fixes: 371087aa476a ("sock: expose so_timestamp options for mptcp") Signed-off-by: Eric Dumazet Bisected-by: John Sperbeck Cc: Paolo Abeni Cc: Florian Westphal Cc: Mat Martineau Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index cad10711220431..1c4b0468bc2c30 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1116,7 +1116,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: - sock_set_timestamp(sk, valbool, optname); + sock_set_timestamp(sk, optname, valbool); break; case SO_TIMESTAMPING_NEW: From a019abd8022061b917da767cd1a66ed823724eab Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Fri, 2 Jul 2021 14:07:36 +0200 Subject: [PATCH 098/714] net: bridge: sync fdb to new unicast-filtering ports Since commit 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") bridges with `vlan_filtering 1` and only 1 auto-port don't set IFF_PROMISC for unicast-filtering-capable ports. Normally on port changes `br_manage_promisc` is called to update the promisc flags and unicast filters if necessary, but it cannot distinguish between *new* ports and ones losing their promisc flag, and new ports end up not receiving the MAC address list. Fix this by calling `br_fdb_sync_static` in `br_add_if` after the port promisc flags are updated and the unicast filter was supposed to have been filled. Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") Signed-off-by: Wolfgang Bumiller Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_if.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f7d2f472ae24f1..6e4a32354a138f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -562,7 +562,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, struct net_bridge_port *p; int err = 0; unsigned br_hr, dev_hr; - bool changed_addr; + bool changed_addr, fdb_synced = false; /* Don't allow bridging non-ethernet like devices. */ if ((dev->flags & IFF_LOOPBACK) || @@ -652,6 +652,19 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, list_add_rcu(&p->list, &br->port_list); nbp_update_port_count(br); + if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) { + /* When updating the port count we also update all ports' + * promiscuous mode. + * A port leaving promiscuous mode normally gets the bridge's + * fdb synced to the unicast filter (if supported), however, + * `br_port_clear_promisc` does not distinguish between + * non-promiscuous ports and *new* ports, so we need to + * sync explicitly here. + */ + fdb_synced = br_fdb_sync_static(br, p) == 0; + if (!fdb_synced) + netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n"); + } netdev_update_features(br->dev); @@ -701,6 +714,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, return 0; err7: + if (fdb_synced) + br_fdb_unsync_static(br, p); list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); From 77ac5e40c44eb78333fbc38482d61fc2af7dda0a Mon Sep 17 00:00:00 2001 From: Louis Peens Date: Fri, 2 Jul 2021 11:21:38 +0200 Subject: [PATCH 099/714] net/sched: act_ct: remove and free nf_table callbacks When cleaning up the nf_table in tcf_ct_flow_table_cleanup_work there is no guarantee that the callback list, added to by nf_flow_table_offload_add_cb, is empty. This means that it is possible that the flow_block_cb memory allocated will be lost. Fix this by iterating the list and freeing the flow_block_cb entries before freeing the nf_table entry (via freeing ct_ft). Fixes: 978703f42549 ("netfilter: flowtable: Add API for registering to flow table events") Signed-off-by: Louis Peens Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/act_ct.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a62f404d0e1657..1b4b3514c94f2a 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -322,11 +322,22 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { + struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; + struct flow_block *block; ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, rwork); nf_flow_table_free(&ct_ft->nf_ft); + + /* Remove any remaining callbacks before cleanup */ + block = &ct_ft->nf_ft.flow_block; + down_write(&ct_ft->nf_ft.flow_block_lock); + list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { + list_del(&block_cb->list); + flow_block_cb_free(block_cb); + } + up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); module_put(THIS_MODULE); From 7cc93d888df764a13f196e3d4aef38869f7dd217 Mon Sep 17 00:00:00 2001 From: Louis Peens Date: Fri, 2 Jul 2021 11:21:39 +0200 Subject: [PATCH 100/714] nfp: flower-ct: remove callback delete deadlock The current location of the callback delete can lead to a race condition where deleting the callback requires a write_lock on the nf_table, but at the same time another thread from netfilter could have taken a read lock on the table before trying to offload. Since the driver is taking a rtnl_lock this can lead into a deadlock situation, where the netfilter offload will wait for the cls_flower rtnl_lock to be released, but this cannot happen since this is waiting for the nf_table read_lock to be released before it can delete the callback. Solve this by completely removing the nf_flow_table_offload_del_cb call, as this will now be cleaned up by act_ct itself when cleaning up the specific nf_table. Fixes: 62268e78145f ("nfp: flower-ct: add nft callback stubs") Signed-off-by: Louis Peens Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/conntrack.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 273d529d43c207..128020b1573e64 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1141,20 +1141,7 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) nfp_fl_ct_clean_flow_entry(ct_entry); kfree(ct_map_ent); - /* If this is the last pre_ct_rule it means that it is - * very likely that the nft table will be cleaned up next, - * as this happens on the removal of the last act_ct flow. - * However we cannot deregister the callback on the removal - * of the last nft flow as this runs into a deadlock situation. - * So deregister the callback on removal of the last pre_ct flow - * and remove any remaining nft flow entries. We also cannot - * save this state and delete the callback later since the - * nft table would already have been freed at that time. - */ if (!zt->pre_ct_count) { - nf_flow_table_offload_del_cb(zt->nft, - nfp_fl_ct_handle_nft_flow, - zt); zt->nft = NULL; nfp_fl_ct_clean_nft_entries(zt); } From b22580233d473dbf7bbfa4f6549c09e2c80e9e64 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Thu, 1 Jul 2021 23:44:27 -0700 Subject: [PATCH 101/714] vmxnet3: fix cksum offload issues for tunnels with non-default udp ports Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the inner offload capability is to be restricted to UDP tunnels with default Vxlan and Geneve ports. This patch fixes the issue for tunnels with non-default ports using features check capability and filtering appropriate features for such tunnels. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_ethtool.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index c0bd9cbc43b1d9..1b483cf2b1ca2d 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -26,6 +26,10 @@ #include "vmxnet3_int.h" +#include +#include + +#define VXLAN_UDP_PORT 8472 struct vmxnet3_stat_desc { char desc[ETH_GSTRING_LEN]; @@ -262,6 +266,8 @@ netdev_features_t vmxnet3_features_check(struct sk_buff *skb, if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_proto = 0; + u16 port; + struct udphdr *udph; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -274,8 +280,20 @@ netdev_features_t vmxnet3_features_check(struct sk_buff *skb, return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } - if (l4_proto != IPPROTO_UDP) + switch (l4_proto) { + case IPPROTO_UDP: + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + /* Check if offloaded port is supported */ + if (port != GENEVE_UDP_PORT && + port != IANA_VXLAN_UDP_PORT && + port != VXLAN_UDP_PORT) { + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + break; + default: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } } return features; } From b43c8909be52f2baca8884f967b418a88424494a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 3 Jul 2021 00:38:43 +0200 Subject: [PATCH 102/714] udp: properly flush normal packet at GRO time If an UDP packet enters the GRO engine but is not eligible for aggregation and is not targeting an UDP tunnel, udp_gro_receive() will not set the flush bit, and packet could delayed till the next napi flush. Fix the issue ensuring non GROed packets traverse skb_gro_flush_final(). Reported-and-tested-by: Matthias Treydte Fixes: 18f25dc39990 ("udp: skip L4 aggregation for UDP tunnel packets") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 54e06b88af69a4..9dde1e5fb449b3 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -525,8 +525,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) - pp = call_gro_receive(udp_gro_receive_segment, head, skb); - return pp; + return call_gro_receive(udp_gro_receive_segment, head, skb); + + /* no GRO, be sure flush the current packet */ + goto out; } if (NAPI_GRO_CB(skb)->encap_mark || From 6ff63a150b5556012589ae59efac1b5eeb7d32c3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 3 Jul 2021 21:17:27 +0200 Subject: [PATCH 103/714] net: marvell: always set skb_shared_info in mvneta_swbm_add_rx_fragment Always set skb_shared_info data structure in mvneta_swbm_add_rx_fragment routine even if the fragment contains only the ethernet FCS. Fixes: 039fbc47f9f1 ("net: mvneta: alloc skb_shared_info on the mvneta_rx_swbm stack") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 361bc4fbe20b31..76a7777c746dac 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2299,19 +2299,19 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, skb_frag_off_set(frag, pp->rx_offset_correction); skb_frag_size_set(frag, data_len); __skb_frag_set_page(frag, page); - - /* last fragment */ - if (len == *size) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = xdp_sinfo->nr_frags; - memcpy(sinfo->frags, xdp_sinfo->frags, - sinfo->nr_frags * sizeof(skb_frag_t)); - } } else { page_pool_put_full_page(rxq->page_pool, page, true); } + + /* last fragment */ + if (len == *size) { + struct skb_shared_info *sinfo; + + sinfo = xdp_get_shared_info_from_buff(xdp); + sinfo->nr_frags = xdp_sinfo->nr_frags; + memcpy(sinfo->frags, xdp_sinfo->frags, + sinfo->nr_frags * sizeof(skb_frag_t)); + } *size -= len; } From 55eac20617ca84129273ab248f4d7bfe456967de Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 5 Jul 2021 16:53:06 +0800 Subject: [PATCH 104/714] ptp: fix NULL pointer dereference in ptp_clock_register Fix NULL pointer dereference in ptp_clock_register. The argument "parent" of ptp_clock_register may be NULL pointer. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Reported-by: kernel test robot Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index f012fa581cf412..ce6d9fc8560723 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -236,7 +236,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } /* PTP virtual clock is being registered under physical clock */ - if (parent->class && parent->class->name && + if (parent && parent->class && parent->class->name && strcmp(parent->class->name, "ptp") == 0) ptp->is_virtual_clock = true; From f6a175cfcc8df578adfdf06b05c82b3b8c8b5cfd Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 5 Jul 2021 17:46:17 +0800 Subject: [PATCH 105/714] ptp: fix format string mismatch in ptp_sysfs.c Fix format string mismatch in ptp_sysfs.c. Use %u for unsigned int. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Reported-by: kernel test robot Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 6a36590ca77a5d..b3d96b74729256 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -179,7 +179,7 @@ static ssize_t n_vclocks_show(struct device *dev, if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) return -ERESTARTSYS; - size = snprintf(page, PAGE_SIZE - 1, "%d\n", ptp->n_vclocks); + size = snprintf(page, PAGE_SIZE - 1, "%u\n", ptp->n_vclocks); mutex_unlock(&ptp->n_vclocks_mux); @@ -252,7 +252,7 @@ static ssize_t max_vclocks_show(struct device *dev, struct ptp_clock *ptp = dev_get_drvdata(dev); ssize_t size; - size = snprintf(page, PAGE_SIZE - 1, "%d\n", ptp->max_vclocks); + size = snprintf(page, PAGE_SIZE - 1, "%u\n", ptp->max_vclocks); return size; } From 81c52c42afd92b741289208c65e5063b9e23ffb4 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:53 +0800 Subject: [PATCH 106/714] net: stmmac: separate the tas basetime calculation function Separate the TAS basetime calculation function so that it can be called by other functions. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 ++ .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 38 ++++++++++++------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index e735134e848764..fcdb1d20389b34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -349,6 +349,9 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue); void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue); void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue); int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags); +struct timespec64 stmmac_calc_tas_basetime(ktime_t old_base_time, + ktime_t current_time, + u64 cycle_time); #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS) void stmmac_selftest_run(struct net_device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 92dab609d4f8d8..596626c71189c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -711,6 +711,29 @@ static int tc_setup_cls(struct stmmac_priv *priv, return ret; } +struct timespec64 stmmac_calc_tas_basetime(ktime_t old_base_time, + ktime_t current_time, + u64 cycle_time) +{ + struct timespec64 time; + + if (ktime_after(old_base_time, current_time)) { + time = ktime_to_timespec64(old_base_time); + } else { + s64 n; + ktime_t base_time; + + n = div64_s64(ktime_sub_ns(current_time, old_base_time), + cycle_time); + base_time = ktime_add_ns(old_base_time, + (n + 1) * cycle_time); + + time = ktime_to_timespec64(base_time); + } + + return time; +} + static int tc_setup_taprio(struct stmmac_priv *priv, struct tc_taprio_qopt_offload *qopt) { @@ -814,19 +837,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv, /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); - if (ktime_after(qopt->base_time, current_time_ns)) { - time = ktime_to_timespec64(qopt->base_time); - } else { - ktime_t base_time; - s64 n; - - n = div64_s64(ktime_sub_ns(current_time_ns, qopt->base_time), - qopt->cycle_time); - base_time = ktime_add_ns(qopt->base_time, - (n + 1) * qopt->cycle_time); - - time = ktime_to_timespec64(base_time); - } + time = stmmac_calc_tas_basetime(qopt->base_time, current_time_ns, + qopt->cycle_time); priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; From b2aae654a4794ef898ad33a179f341eb610f6b85 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:54 +0800 Subject: [PATCH 107/714] net: stmmac: add mutex lock to protect est parameters Add a mutex lock to protect est structure parameters so that the EST parameters can be updated by other threads. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 12 +++++++++++- include/linux/stmmac.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 596626c71189c8..2e3cdf540168cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -796,14 +796,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv, GFP_KERNEL); if (!plat->est) return -ENOMEM; + + mutex_init(&priv->plat->est->lock); } else { memset(plat->est, 0, sizeof(*plat->est)); } size = qopt->num_entries; + mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; + mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -834,6 +838,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } + mutex_lock(&priv->plat->est->lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -847,8 +852,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; - if (fpe && !priv->dma_cap.fpesel) + if (fpe && !priv->dma_cap.fpesel) { + mutex_unlock(&priv->plat->est->lock); return -EOPNOTSUPP; + } /* Actual FPE register configuration will be done after FPE handshake * is success. @@ -857,6 +864,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -872,9 +880,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return 0; disable: + mutex_lock(&priv->plat->est->lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d5ae621d66badd..09157b8a5810bd 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -115,6 +115,7 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { + struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; From e9e3720002f61cd637a49ecafae77cac230eefae Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:55 +0800 Subject: [PATCH 108/714] net: stmmac: ptp: update tas basetime after ptp adjust After adjusting the ptp time, the Qbv base time may be the past time of the new current time. dwmac5 hardware limited the base time cannot be set as past time. This patch add a btr_reserve to store the base time get from qopt, then calculate the base time and reset the Qbv configuration after ptp time adjust. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 41 ++++++++++++++++++- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 6 ++- include/linux/stmmac.h | 1 + 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 4e86cdf2bc9f41..580cc035536bd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -62,7 +62,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) u32 sec, nsec; u32 quotient, reminder; int neg_adj = 0; - bool xmac; + bool xmac, est_rst = false; + int ret; xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; @@ -75,10 +76,48 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) sec = quotient; nsec = reminder; + /* If EST is enabled, disabled it before adjust ptp time. */ + if (priv->plat->est && priv->plat->est->enable) { + est_rst = true; + mutex_lock(&priv->plat->est->lock); + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + } + spin_lock_irqsave(&priv->ptp_lock, flags); stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); spin_unlock_irqrestore(&priv->ptp_lock, flags); + /* Caculate new basetime and re-configured EST after PTP time adjust. */ + if (est_rst) { + struct timespec64 current_time, time; + ktime_t current_time_ns, basetime; + u64 cycle_time; + + mutex_lock(&priv->plat->est->lock); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + time.tv_nsec = priv->plat->est->btr_reserve[0]; + time.tv_sec = priv->plat->est->btr_reserve[1]; + basetime = timespec64_to_ktime(time); + cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC + + priv->plat->est->ctr[0]; + time = stmmac_calc_tas_basetime(basetime, + current_time_ns, + cycle_time); + + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + priv->plat->est->enable = true; + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + if (ret) + netdev_err(priv->dev, "failed to configure EST\n"); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 2e3cdf540168cb..4f3b6437b11455 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -739,7 +739,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time, current_time; + struct timespec64 time, current_time, qopt_time; ktime_t current_time_ns; bool fpe = false; int i, ret = 0; @@ -848,6 +848,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; + qopt_time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr_reserve[0] = (u32)qopt_time.tv_nsec; + priv->plat->est->btr_reserve[1] = (u32)qopt_time.tv_sec; + ctr = qopt->cycle_time; priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 09157b8a5810bd..a6f03b36fc4f76 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -117,6 +117,7 @@ struct stmmac_axi { struct stmmac_est { struct mutex lock; int enable; + u32 btr_reserve[2]; u32 btr_offset[2]; u32 btr[2]; u32 ctr[2]; From 5a0ae9872d5cb5f27590eed168d4b3b144350ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 5 Jul 2021 12:38:41 +0200 Subject: [PATCH 109/714] bpf, samples: Add -fno-asynchronous-unwind-tables to BPF Clang invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The samples/bpf Makefile currently compiles BPF files in a way that will produce an .eh_frame section, which will in turn confuse libbpf and produce errors when loading BPF programs, like: libbpf: elf: skipping unrecognized data section(32) .eh_frame libbpf: elf: skipping relo section(33) .rel.eh_frame for section(32) .eh_frame Fix this by instruction Clang not to produce this section, as it's useless for BPF anyway. Suggested-by: Daniel Borkmann Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210705103841.180260-1-toke@redhat.com --- samples/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 520434ea966ffe..036998d11ded1b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -331,6 +331,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ + -fno-asynchronous-unwind-tables \ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ From 2620e92ae6ed83260eb46d214554cd308ee35d92 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 28 Jun 2021 17:18:15 +0800 Subject: [PATCH 110/714] bpf, samples: Fix xdpsock with '-M' parameter missing unload process Execute the following command and exit, then execute it again, the following error will be reported: $ sudo ./samples/bpf/xdpsock -i ens4f2 -M ^C $ sudo ./samples/bpf/xdpsock -i ens4f2 -M libbpf: elf: skipping unrecognized data section(16) .eh_frame libbpf: elf: skipping relo section(17) .rel.eh_frame for section(16) .eh_frame libbpf: Kernel error message: XDP program already attached ERROR: link set xdp fd failed Commit c9d27c9e8dc7 ("samples: bpf: Do not unload prog within xdpsock") removed the unloading prog code because of the presence of bpf_link. This is fine if XDP_SHARED_UMEM is disabled, but if it is enabled, unloading the prog is still needed. Fixes: c9d27c9e8dc7 ("samples: bpf: Do not unload prog within xdpsock") Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Cc: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20210628091815.2373487-1-wanghai38@huawei.com --- samples/bpf/xdpsock_user.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 53e300f860bb4f..33d0bdebbed81d 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -96,6 +96,7 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static int opt_timeout = 1000; static bool opt_need_wakeup = true; static u32 opt_num_xsks = 1; +static u32 prog_id; static bool opt_busy_poll; static bool opt_reduced_cap; @@ -461,6 +462,23 @@ static void *poller(void *arg) return NULL; } +static void remove_xdp_program(void) +{ + u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAILURE); + } + + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); +} + static void int_exit(int sig) { benchmark_done = true; @@ -471,6 +489,9 @@ static void __exit_with_error(int error, const char *file, const char *func, { fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, line, error, strerror(error)); + + if (opt_num_xsks > 1) + remove_xdp_program(); exit(EXIT_FAILURE); } @@ -490,6 +511,9 @@ static void xdpsock_cleanup(void) if (write(sock, &cmd, sizeof(int)) < 0) exit_with_error(errno); } + + if (opt_num_xsks > 1) + remove_xdp_program(); } static void swap_mac_addresses(void *data) @@ -857,6 +881,10 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, if (ret) exit_with_error(-ret); + ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); + if (ret) + exit_with_error(-ret); + xsk->app_stats.rx_empty_polls = 0; xsk->app_stats.fill_fail_polls = 0; xsk->app_stats.copy_tx_sendtos = 0; From bc832065b60f973771ff3e657214bb21b559833c Mon Sep 17 00:00:00 2001 From: Gu Shengxian Date: Mon, 5 Jul 2021 18:35:43 -0700 Subject: [PATCH 111/714] bpftool: Properly close va_list 'ap' by va_end() on error va_list 'ap' was opened but not closed by va_end() in error case. It should be closed by va_end() before the return. Fixes: aa52bcbe0e72 ("tools: bpftool: Fix json dump crash on powerpc") Signed-off-by: Gu Shengxian Signed-off-by: Daniel Borkmann Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/20210706013543.671114-1-gushengxian507419@gmail.com --- tools/bpf/bpftool/jit_disasm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index e7e7eee9f17250..24734f2249d6ec 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -43,11 +43,13 @@ static int fprintf_json(void *out, const char *fmt, ...) { va_list ap; char *s; + int err; va_start(ap, fmt); - if (vasprintf(&s, fmt, ap) < 0) - return -1; + err = vasprintf(&s, fmt, ap); va_end(ap); + if (err < 0) + return -1; if (!oper_count) { int i; From fb5dad4084f0ea6b6df5fe90f157531ca6e20681 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 8 Jun 2021 14:39:54 +0200 Subject: [PATCH 112/714] KVM: selftests: introduce P44V64 for z196 and EC12 Older machines like z196 and zEC12 do only support 44 bits of physical addresses. Make this the default and check via IBC if we are on a later machine. We then add P47V64 as an additional model. Reviewed-by: David Hildenbrand Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Link: https://lore.kernel.org/kvm/20210701153853.33063-1-borntraeger@de.ibm.com/ Fixes: 1bc603af73dd ("KVM: selftests: introduce P47V64 for s390x") --- tools/testing/selftests/kvm/include/kvm_util.h | 3 ++- tools/testing/selftests/kvm/lib/guest_modes.c | 16 ++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 35739567189e09..74d73532fce9e1 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -44,6 +44,7 @@ enum vm_guest_mode { VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, NUM_VM_MODES, }; @@ -61,7 +62,7 @@ enum vm_guest_mode { #elif defined(__s390x__) -#define VM_MODE_DEFAULT VM_MODE_P47V64_4K +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 16) diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 25bff307c71f2a..c330f414ef96dc 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -22,6 +22,22 @@ void guest_modes_append_default(void) } } #endif +#ifdef __s390x__ + { + int kvm_fd, vm_fd; + struct kvm_s390_vm_cpu_processor info; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); + kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info, false); + close(vm_fd); + close(kvm_fd); + /* Starting with z13 we have 47bits of physical address */ + if (info.ibc >= 0x30) + guest_mode_append(VM_MODE_P47V64_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b732cf96ea48..8606000c439e4b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", + [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { { 40, 48, 0x10000, 16 }, { 0, 0, 0x1000, 12 }, { 47, 64, 0x1000, 12 }, + { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) case VM_MODE_P47V64_4K: vm->pgtable_levels = 5; break; + case VM_MODE_P44V64_4K: + vm->pgtable_levels = 5; + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } From cd4220d23bf3f43cf720e82bdee681f383433ae2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 1 Jul 2021 17:42:24 +0200 Subject: [PATCH 113/714] KVM: selftests: do not require 64GB in set_memory_region_test Unless the user sets overcommit_memory or has plenty of swap, the latest changes to the testcase will result in ENOMEM failures for hosts with less than 64GB RAM. As we do not use much of the allocated memory, we can use MAP_NORESERVE to avoid this error. Cc: Zenghui Yu Cc: vkuznets@redhat.com Cc: wanghaibin.wang@huawei.com Cc: stable@vger.kernel.org Fixes: 309505dd5685 ("KVM: selftests: Fix mapping length truncation in m{,un}map()") Tested-by: Zenghui Yu Link: https://lore.kernel.org/kvm/20210701160425.33666-1-borntraeger@de.ibm.com/ Signed-off-by: Christian Borntraeger --- tools/testing/selftests/kvm/set_memory_region_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index d8812f27648ca5..d31f54ac4e9824 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void) (max_mem_slots - 1), MEM_REGION_SIZE >> 10); mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); From c4edc3ccbc63947e697bd2e30afca8bfaa144998 Mon Sep 17 00:00:00 2001 From: Ali Abdallah Date: Thu, 20 May 2021 12:53:11 +0200 Subject: [PATCH 114/714] netfilter: conntrack: improve RST handling when tuple is re-used If we receive a SYN packet in original direction on an existing connection tracking entry, we let this SYN through because conntrack might be out-of-sync. Conntrack gets back in sync when server responds with SYN/ACK and state gets updated accordingly. However, if server replies with RST, this packet might be marked as INVALID because td_maxack value reflects the *old* conntrack state and not the state of the originator of the RST. Avoid td_maxack-based checks if previous packet was a SYN. Unfortunately that is not be enough: an out of order ACK in original direction updates last_index, so we still end up marking valid RST. Thus disable the sequence check when we are not in established state and the received RST has a sequence of 0. Because marking RSTs as invalid usually leads to unwanted timeouts, also skip RST sequence checks if a conntrack entry is already closing. Such entries can already be evicted via GC in case the table is full. Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Ali Abdallah Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 53 +++++++++++++++++--------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index eb4de92077a8b5..b8ff67671e93a2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -823,6 +823,22 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static bool tcp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.tcp.state) { + case TCP_CONNTRACK_FIN_WAIT: + case TCP_CONNTRACK_LAST_ACK: + case TCP_CONNTRACK_TIME_WAIT: + case TCP_CONNTRACK_CLOSE: + case TCP_CONNTRACK_CLOSE_WAIT: + return true; + default: + break; + } + + return false; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -1030,9 +1046,28 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (index != TCP_RST_SET) break; - if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) { + /* If we are closing, tuple might have been re-used already. + * last_index, last_ack, and all other ct fields used for + * sequence/window validation are outdated in that case. + * + * As the conntrack can already be expired by GC under pressure, + * just skip validation checks. + */ + if (tcp_can_early_drop(ct)) + goto in_window; + + /* td_maxack might be outdated if we let a SYN through earlier */ + if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && + ct->proto.tcp.last_index != TCP_SYN_SET) { u32 seq = ntohl(th->seq); + /* If we are not in established state and SEQ=0 this is most + * likely an answer to a SYN we let go through above (last_index + * can be updated due to out-of-order ACKs). + */ + if (seq == 0 && !nf_conntrack_tcp_established(ct)) + break; + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); @@ -1165,22 +1200,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, return NF_ACCEPT; } -static bool tcp_can_early_drop(const struct nf_conn *ct) -{ - switch (ct->proto.tcp.state) { - case TCP_CONNTRACK_FIN_WAIT: - case TCP_CONNTRACK_LAST_ACK: - case TCP_CONNTRACK_TIME_WAIT: - case TCP_CONNTRACK_CLOSE: - case TCP_CONNTRACK_CLOSE_WAIT: - return true; - default: - break; - } - - return false; -} - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include From 1da4cd82dd180224503e745ccf3220e3490d8897 Mon Sep 17 00:00:00 2001 From: Ali Abdallah Date: Thu, 27 May 2021 09:19:06 +0200 Subject: [PATCH 115/714] netfilter: conntrack: add new sysctl to disable RST check This patch adds a new sysctl tcp_ignore_invalid_rst to disable marking out of segments RSTs as INVALID. Signed-off-by: Ali Abdallah Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 6 ++++++ include/net/netns/conntrack.h | 1 + net/netfilter/nf_conntrack_proto_tcp.c | 6 +++++- net/netfilter/nf_conntrack_standalone.c | 10 ++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 0467b30e4abe7b..d31ed6c1cb0d12 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -110,6 +110,12 @@ nf_conntrack_tcp_be_liberal - BOOLEAN Be conservative in what you do, be liberal in what you accept from others. If it's non-zero, we mark only out of window RST segments as INVALID. +nf_conntrack_tcp_ignore_invalid_rst - BOOLEAN + - 0 - disabled (default) + - 1 - enabled + + If it's 1, we don't mark out of window RST segments as INVALID. + nf_conntrack_tcp_loose - BOOLEAN - 0 - disabled - not 0 - enabled (default) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c3094b83a5258d..37e5300c7e5a14 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -27,6 +27,7 @@ struct nf_tcp_net { u8 tcp_loose; u8 tcp_be_liberal; u8 tcp_max_retrans; + u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; unsigned int offload_pickup; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b8ff67671e93a2..3259416f2ea4b1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1068,7 +1068,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (seq == 0 && !nf_conntrack_tcp_established(ct)) break; - if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) && + !tn->tcp_ignore_invalid_rst) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); @@ -1466,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net) */ tn->tcp_be_liberal = 0; + /* If it's non-zero, we turn off RST sequence number check */ + tn->tcp_ignore_invalid_rst = 0; + /* Max number of the retransmitted packets without receiving an (acceptable) * ACK from the destination. If this number is reached, a shorter timer * will be started. diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f57a951c9b5e78..214d9f9e499bd0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -579,6 +579,7 @@ enum nf_ct_sysctl_index { #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST, NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, @@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = { + .procname = "nf_conntrack_tcp_ignore_invalid_rst", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", .maxlen = sizeof(u8), @@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, XASSIGN(LOOSE, &tn->tcp_loose); XASSIGN(LIBERAL, &tn->tcp_be_liberal); XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); + XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst); #undef XASSIGN #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) From cf4466ea47db891be785f867ca7f99e0cd9898c6 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 27 Jun 2021 18:19:18 +0200 Subject: [PATCH 116/714] netfilter: conntrack: Mark access for KCSAN KCSAN detected an data race with ipc/sem.c that is intentional. As nf_conntrack_lock() uses the same algorithm: Update nf_conntrack_core as well: nf_conntrack_lock() contains a1) spin_lock() a2) smp_load_acquire(nf_conntrack_locks_all). a1) actually accesses one lock from an array of locks. nf_conntrack_locks_all() contains b1) nf_conntrack_locks_all=true (normal write) b2) spin_lock() b3) spin_unlock() b2 and b3 are done for every lock. This guarantees that nf_conntrack_locks_all() prevents any concurrent nf_conntrack_lock() owners: If a thread past a1), then b2) will block until that thread releases the lock. If the threat is before a1, then b3)+a1) ensure the write b1) is visible, thus a2) is guaranteed to see the updated value. But: This is only the latest time when b1) becomes visible. It may also happen that b1) is visible an undefined amount of time before the b3). And thus KCSAN will notice a data race. In addition, the compiler might be too clever. Solution: Use WRITE_ONCE(). Signed-off-by: Manfred Spraul Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 085a11f1eb43fd..83c52df8587014 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -149,7 +149,15 @@ static void nf_conntrack_all_lock(void) spin_lock(&nf_conntrack_locks_all_lock); - nf_conntrack_locks_all = true; + /* For nf_contrack_locks_all, only the latest time when another + * CPU will see an update is controlled, by the "release" of the + * spin_lock below. + * The earliest time is not controlled, an thus KCSAN could detect + * a race when nf_conntract_lock() reads the variable. + * WRITE_ONCE() is used to ensure the compiler will not + * optimize the write. + */ + WRITE_ONCE(nf_conntrack_locks_all, true); for (i = 0; i < CONNTRACK_LOCKS; i++) { spin_lock(&nf_conntrack_locks[i]); From 6ac4bac4ce48604cf0f4b04d61884552520ca55e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Jul 2021 17:45:36 +0200 Subject: [PATCH 117/714] netfilter: nft_last: honor NFTA_LAST_SET on restoration NFTA_LAST_SET tells us if this expression has ever seen a packet, do not ignore this attribute when restoring the ruleset. Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 913ac45167f245..bbb352b64c7378 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -23,15 +23,21 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); u64 last_jiffies; + u32 last_set = 0; int err; - if (tb[NFTA_LAST_MSECS]) { + if (tb[NFTA_LAST_SET]) { + last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); + if (last_set == 1) + priv->last_set = 1; + } + + if (last_set && tb[NFTA_LAST_MSECS]) { err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); if (err < 0) return err; priv->last_jiffies = jiffies + (unsigned long)last_jiffies; - priv->last_set = 1; } return 0; From d1b5b80da7058883758df2b5b7f506d4d4f9a5fa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Jul 2021 20:14:21 +0200 Subject: [PATCH 118/714] netfilter: nft_last: incorrect arithmetics when restoring last used Subtract the jiffies that have passed by to current jiffies to fix last used restoration. Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index bbb352b64c7378..8088b99f2ee3e4 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -37,7 +37,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - priv->last_jiffies = jiffies + (unsigned long)last_jiffies; + priv->last_jiffies = jiffies - (unsigned long)last_jiffies; } return 0; From 8550ff8d8c75416e984d9c4b082845e57e560984 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 5 Jul 2021 13:54:51 +0300 Subject: [PATCH 119/714] skbuff: Release nfct refcount on napi stolen or re-used skbs When multiple SKBs are merged to a new skb under napi GRO, or SKB is re-used by napi, if nfct was set for them in the driver, it will not be released while freeing their stolen head state or on re-use. Release nfct on napi's stolen or re-used SKBs, and in gro_list_prepare, check conntrack metadata diff. Fixes: 5c6b94604744 ("net/mlx5e: CT: Handle misses after executing CT action") Reviewed-by: Roi Dayan Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- net/core/dev.c | 13 +++++++++++++ net/core/skbuff.c | 1 + 2 files changed, 14 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index c253c2aafe97cd..177a5aec0b6bef 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6008,6 +6008,18 @@ static void gro_list_prepare(const struct list_head *head, diffs = memcmp(skb_mac_header(p), skb_mac_header(skb), maclen); + + diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); + + if (!diffs) { + struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT); + struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT); + + diffs |= (!!p_ext) ^ (!!skb_ext); + if (!diffs && unlikely(skb_ext)) + diffs |= p_ext->chain ^ skb_ext->chain; + } + NAPI_GRO_CB(p)->same_flow = !diffs; } } @@ -6270,6 +6282,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); skb_ext_reset(skb); + nf_reset_ct(skb); napi->skb = skb; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 12aabcda6db20e..f63de967ac2593 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -943,6 +943,7 @@ void __kfree_skb_defer(struct sk_buff *skb) void napi_skb_free_stolen_head(struct sk_buff *skb) { + nf_reset_ct(skb); skb_dst_drop(skb); skb_ext_put(skb); napi_skb_cache_put(skb); From be5d1b61a2ad28c7e57fe8bfa277373e8ecffcdc Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Tue, 6 Jul 2021 07:19:12 +0800 Subject: [PATCH 120/714] tcp: fix tcp_init_transfer() to not reset icsk_ca_initialized This commit fixes a bug (found by syzkaller) that could cause spurious double-initializations for congestion control modules, which could cause memory leaks or other problems for congestion control modules (like CDG) that allocate memory in their init functions. The buggy scenario constructed by syzkaller was something like: (1) create a TCP socket (2) initiate a TFO connect via sendto() (3) while socket is in TCP_SYN_SENT, call setsockopt(TCP_CONGESTION), which calls: tcp_set_congestion_control() -> tcp_reinit_congestion_control() -> tcp_init_congestion_control() (4) receive ACK, connection is established, call tcp_init_transfer(), set icsk_ca_initialized=0 (without first calling cc->release()), call tcp_init_congestion_control() again. Note that in this sequence tcp_init_congestion_control() is called twice without a cc->release() call in between. Thus, for CC modules that allocate memory in their init() function, e.g, CDG, a memory leak may occur. The syzkaller tool managed to find a reproducer that triggered such a leak in CDG. The bug was introduced when that commit 8919a9b31eb4 ("tcp: Only init congestion control if not initialized already") introduced icsk_ca_initialized and set icsk_ca_initialized to 0 in tcp_init_transfer(), missing the possibility for a sequence like the one above, where a process could call setsockopt(TCP_CONGESTION) in state TCP_SYN_SENT (i.e. after the connect() or TFO open sendmsg()), which would call tcp_init_congestion_control(). It did not intend to reset any initialization that the user had already explicitly made; it just missed the possibility of that particular sequence (which syzkaller managed to find). Fixes: 8919a9b31eb4 ("tcp: Only init congestion control if not initialized already") Reported-by: syzbot+f1e24a0594d4e3a895d3@syzkaller.appspotmail.com Signed-off-by: Nguyen Dinh Phi Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6ca5a1f3b595b..a5a8d0a378b23f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5922,8 +5922,8 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); tp->snd_cwnd_stamp = tcp_jiffies32; - icsk->icsk_ca_initialized = 0; bpf_skops_established(sk, bpf_op, skb); + /* Initialize congestion control unless BPF initialized it already: */ if (!icsk->icsk_ca_initialized) tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); From b648eba4c69e5819880b4907e7fcb2bb576069ab Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:06 +0000 Subject: [PATCH 121/714] bonding: fix suspicious RCU usage in bond_ipsec_add_sa() To dereference bond->curr_active_slave, it uses rcu_dereference(). But it and the caller doesn't acquire RCU so a warning occurs. So add rcu_read_lock(). Test commands: ip link add dummy0 type dummy ip link add bond0 type bond ip link set dummy0 master bond0 ip link set dummy0 up ip link set bond0 up ip x s add proto esp dst 14.1.1.1 src 15.1.1.1 spi 0x07 \ mode transport \ reqid 0x07 replay-window 32 aead 'rfc4106(gcm(aes))' \ 0x44434241343332312423222114131211f4f3f2f1 128 sel \ src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp offload \ dev bond0 dir in Splat looks like: ============================= WARNING: suspicious RCU usage 5.13.0-rc3+ #1168 Not tainted ----------------------------- drivers/net/bonding/bond_main.c:411 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/684: #0: ffffffff9a2757c0 (&net->xfrm.xfrm_cfg_mutex){+.+.}-{3:3}, at: xfrm_netlink_rcv+0x59/0x80 [xfrm_user] 55.191733][ T684] stack backtrace: CPU: 0 PID: 684 Comm: ip Not tainted 5.13.0-rc3+ #1168 Call Trace: dump_stack+0xa4/0xe5 bond_ipsec_add_sa+0x18c/0x1f0 [bonding] xfrm_dev_state_add+0x2a9/0x770 ? memcpy+0x38/0x60 xfrm_add_sa+0x2278/0x3b10 [xfrm_user] ? xfrm_get_policy+0xaa0/0xaa0 [xfrm_user] ? register_lock_class+0x1750/0x1750 xfrm_user_rcv_msg+0x331/0x660 [xfrm_user] ? rcu_read_lock_sched_held+0x91/0xc0 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? find_held_lock+0x3a/0x1c0 ? mutex_lock_io_nested+0x1210/0x1210 ? sched_clock_cpu+0x18/0x170 netlink_rcv_skb+0x121/0x350 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? netlink_ack+0x9d0/0x9d0 ? netlink_deliver_tap+0x17c/0xa50 xfrm_netlink_rcv+0x68/0x80 [xfrm_user] netlink_unicast+0x41c/0x610 ? netlink_attachskb+0x710/0x710 netlink_sendmsg+0x6b9/0xb70 [ ... ] Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0ff7567bd04f07..d4d718e04dcc49 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -403,10 +403,12 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) struct net_device *bond_dev = xs->xso.dev; struct bonding *bond; struct slave *slave; + int err; if (!bond_dev) return -EINVAL; + rcu_read_lock(); bond = netdev_priv(bond_dev); slave = rcu_dereference(bond->curr_active_slave); xs->xso.real_dev = slave->dev; @@ -415,10 +417,13 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) if (!(slave->dev->xfrmdev_ops && slave->dev->xfrmdev_ops->xdo_dev_state_add)) { slave_warn(bond_dev, slave->dev, "Slave does not support ipsec offload\n"); + rcu_read_unlock(); return -EINVAL; } - return slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + rcu_read_unlock(); + return err; } /** From 105cd17a866017b45f3c45901b394c711c97bf40 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:07 +0000 Subject: [PATCH 122/714] bonding: fix null dereference in bond_ipsec_add_sa() If bond doesn't have real device, bond->curr_active_slave is null. But bond_ipsec_add_sa() dereferences bond->curr_active_slave without null checking. So, null-ptr-deref would occur. Test commands: ip link add bond0 type bond ip link set bond0 up ip x s add proto esp dst 14.1.1.1 src 15.1.1.1 spi \ 0x07 mode transport reqid 0x07 replay-window 32 aead 'rfc4106(gcm(aes))' \ 0x44434241343332312423222114131211f4f3f2f1 128 sel src 14.0.0.52/24 \ dst 14.0.0.70/24 proto tcp offload dev bond0 dir in Splat looks like: KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 4 PID: 680 Comm: ip Not tainted 5.13.0-rc3+ #1168 RIP: 0010:bond_ipsec_add_sa+0xc4/0x2e0 [bonding] Code: 85 21 02 00 00 4d 8b a6 48 0c 00 00 e8 75 58 44 ce 85 c0 0f 85 14 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 0f 85 fc 01 00 00 48 8d bb e0 02 00 00 4d 8b 2c 24 48 RSP: 0018:ffff88810946f508 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88810b4e8040 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffff8fe34280 RDI: ffff888115abe100 RBP: ffff88810946f528 R08: 0000000000000003 R09: fffffbfff2287e11 R10: 0000000000000001 R11: ffff888115abe0c8 R12: 0000000000000000 R13: ffffffffc0aea9a0 R14: ffff88800d7d2000 R15: ffff88810b4e8330 FS: 00007efc5552e680(0000) GS:ffff888119c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c2530dbf40 CR3: 0000000103056004 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: xfrm_dev_state_add+0x2a9/0x770 ? memcpy+0x38/0x60 xfrm_add_sa+0x2278/0x3b10 [xfrm_user] ? xfrm_get_policy+0xaa0/0xaa0 [xfrm_user] ? register_lock_class+0x1750/0x1750 xfrm_user_rcv_msg+0x331/0x660 [xfrm_user] ? rcu_read_lock_sched_held+0x91/0xc0 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? find_held_lock+0x3a/0x1c0 ? mutex_lock_io_nested+0x1210/0x1210 ? sched_clock_cpu+0x18/0x170 netlink_rcv_skb+0x121/0x350 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? netlink_ack+0x9d0/0x9d0 ? netlink_deliver_tap+0x17c/0xa50 xfrm_netlink_rcv+0x68/0x80 [xfrm_user] netlink_unicast+0x41c/0x610 ? netlink_attachskb+0x710/0x710 netlink_sendmsg+0x6b9/0xb70 [ ...] Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d4d718e04dcc49..5466b24ceab6b5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -411,6 +411,11 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) rcu_read_lock(); bond = netdev_priv(bond_dev); slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return -ENODEV; + } + xs->xso.real_dev = slave->dev; bond->xs = xs; From 09adf7566d436322ced595b166dea48b06852efe Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:08 +0000 Subject: [PATCH 123/714] net: netdevsim: use xso.real_dev instead of xso.dev in callback functions of struct xfrmdev_ops There are two pointers in struct xfrm_state_offload, *dev, *real_dev. These are used in callback functions of struct xfrmdev_ops. The *dev points whether bonding interface or real interface. If bonding ipsec offload is used, it points bonding interface If not, it points real interface. And real_dev always points real interface. So, netdevsim should always use real_dev instead of dev. Of course, real_dev always not be null. Test commands: ip netns add A ip netns exec A bash modprobe netdevsim echo "1 1" > /sys/bus/netdevsim/new_device ip link add bond0 type bond mode active-backup ip link set eth0 master bond0 ip link set eth0 up ip link set bond0 up ip x s add proto esp dst 14.1.1.1 src 15.1.1.1 spi 0x07 mode \ transport reqid 0x07 replay-window 32 aead 'rfc4106(gcm(aes))' \ 0x44434241343332312423222114131211f4f3f2f1 128 sel src 14.0.0.52/24 \ dst 14.0.0.70/24 proto tcp offload dev bond0 dir in Splat looks like: BUG: spinlock bad magic on CPU#5, kworker/5:1/53 lock: 0xffff8881068c2cc8, .magic: 11121314, .owner: /-1, .owner_cpu: -235736076 CPU: 5 PID: 53 Comm: kworker/5:1 Not tainted 5.13.0-rc3+ #1168 Workqueue: events linkwatch_event Call Trace: dump_stack+0xa4/0xe5 do_raw_spin_lock+0x20b/0x270 ? rwlock_bug.part.1+0x90/0x90 _raw_spin_lock_nested+0x5f/0x70 bond_get_stats+0xe4/0x4c0 [bonding] ? rcu_read_lock_sched_held+0xc0/0xc0 ? bond_neigh_init+0x2c0/0x2c0 [bonding] ? dev_get_alias+0xe2/0x190 ? dev_get_port_parent_id+0x14a/0x360 ? rtnl_unregister+0x190/0x190 ? dev_get_phys_port_name+0xa0/0xa0 ? memset+0x1f/0x40 ? memcpy+0x38/0x60 ? rtnl_phys_switch_id_fill+0x91/0x100 dev_get_stats+0x8c/0x270 rtnl_fill_stats+0x44/0xbe0 ? nla_put+0xbe/0x140 rtnl_fill_ifinfo+0x1054/0x3ad0 [ ... ] Fixes: 272c2330adc9 ("xfrm: bail early on slave pass over skb") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/netdevsim/ipsec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index 3811f1bde84e70..b80ed2ffd45eb5 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -85,7 +85,7 @@ static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { const char aes_gcm_name[] = "rfc4106(gcm(aes))"; - struct net_device *dev = xs->xso.dev; + struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -134,7 +134,7 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs) u16 sa_idx; int ret; - dev = xs->xso.dev; + dev = xs->xso.real_dev; ns = netdev_priv(dev); ipsec = &ns->ipsec; @@ -194,7 +194,7 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs) static void nsim_ipsec_del_sa(struct xfrm_state *xs) { - struct netdevsim *ns = netdev_priv(xs->xso.dev); + struct netdevsim *ns = netdev_priv(xs->xso.real_dev); struct nsim_ipsec *ipsec = &ns->ipsec; u16 sa_idx; @@ -211,7 +211,7 @@ static void nsim_ipsec_del_sa(struct xfrm_state *xs) static bool nsim_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) { - struct netdevsim *ns = netdev_priv(xs->xso.dev); + struct netdevsim *ns = netdev_priv(xs->xso.real_dev); struct nsim_ipsec *ipsec = &ns->ipsec; ipsec->ok++; From 2de7e4f67599affc97132bd07e30e3bd59d0b777 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:09 +0000 Subject: [PATCH 124/714] ixgbevf: use xso.real_dev instead of xso.dev in callback functions of struct xfrmdev_ops There are two pointers in struct xfrm_state_offload, *dev, *real_dev. These are used in callback functions of struct xfrmdev_ops. The *dev points whether bonding interface or real interface. If bonding ipsec offload is used, it points bonding interface If not, it points real interface. And real_dev always points real interface. So, ixgbevf should always use real_dev instead of dev. Of course, real_dev always not be null. Test commands: ip link add bond0 type bond #eth0 is ixgbevf interface ip link set eth0 master bond0 ip link set bond0 up ip x s add proto esp dst 14.1.1.1 src 15.1.1.1 spi 0x07 mode \ transport reqid 0x07 replay-window 32 aead 'rfc4106(gcm(aes))' \ 0x44434241343332312423222114131211f4f3f2f1 128 sel src 14.0.0.52/24 \ dst 14.0.0.70/24 proto tcp offload dev bond0 dir in Splat looks like: KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 6 PID: 688 Comm: ip Not tainted 5.13.0-rc3+ #1168 RIP: 0010:ixgbevf_ipsec_find_empty_idx+0x28/0x1b0 [ixgbevf] Code: 00 00 0f 1f 44 00 00 55 53 48 89 fb 48 83 ec 08 40 84 f6 0f 84 9c 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 01 0f 8e 4c 01 00 00 66 81 3b 00 04 0f RSP: 0018:ffff8880089af390 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffff8880089af4f8 R08: 0000000000000003 R09: fffffbfff4287e11 R10: 0000000000000001 R11: ffff888005de8908 R12: 0000000000000000 R13: ffff88810936a000 R14: ffff88810936a000 R15: ffff888004d78040 FS: 00007fdf9883a680(0000) GS:ffff88811a400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bc14adbf40 CR3: 000000000b87c005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ixgbevf_ipsec_add_sa+0x1bf/0x9c0 [ixgbevf] ? rcu_read_lock_sched_held+0x91/0xc0 ? ixgbevf_ipsec_parse_proto_keys.isra.9+0x280/0x280 [ixgbevf] ? lock_acquire+0x191/0x720 ? bond_ipsec_add_sa+0x48/0x350 [bonding] ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 ? rcu_read_lock_held+0x91/0xa0 ? rcu_read_lock_sched_held+0xc0/0xc0 bond_ipsec_add_sa+0x193/0x350 [bonding] xfrm_dev_state_add+0x2a9/0x770 ? memcpy+0x38/0x60 xfrm_add_sa+0x2278/0x3b10 [xfrm_user] ? xfrm_get_policy+0xaa0/0xaa0 [xfrm_user] ? register_lock_class+0x1750/0x1750 xfrm_user_rcv_msg+0x331/0x660 [xfrm_user] ? rcu_read_lock_sched_held+0x91/0xc0 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? find_held_lock+0x3a/0x1c0 ? mutex_lock_io_nested+0x1210/0x1210 ? sched_clock_cpu+0x18/0x170 netlink_rcv_skb+0x121/0x350 [ ... ] Fixes: 272c2330adc9 ("xfrm: bail early on slave pass over skb") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index caaea2c920a6e4..e3e4676af9e451 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -211,7 +211,7 @@ struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec, static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { - struct net_device *dev = xs->xso.dev; + struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -260,12 +260,15 @@ static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, **/ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs) { - struct net_device *dev = xs->xso.dev; - struct ixgbevf_adapter *adapter = netdev_priv(dev); - struct ixgbevf_ipsec *ipsec = adapter->ipsec; + struct net_device *dev = xs->xso.real_dev; + struct ixgbevf_adapter *adapter; + struct ixgbevf_ipsec *ipsec; u16 sa_idx; int ret; + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { netdev_err(dev, "Unsupported protocol 0x%04x for IPsec offload\n", xs->id.proto); @@ -383,11 +386,14 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs) **/ static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs) { - struct net_device *dev = xs->xso.dev; - struct ixgbevf_adapter *adapter = netdev_priv(dev); - struct ixgbevf_ipsec *ipsec = adapter->ipsec; + struct net_device *dev = xs->xso.real_dev; + struct ixgbevf_adapter *adapter; + struct ixgbevf_ipsec *ipsec; u16 sa_idx; + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; From a22c39b831a081da9b2c488bd970a4412d926f30 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:10 +0000 Subject: [PATCH 125/714] bonding: fix suspicious RCU usage in bond_ipsec_del_sa() To dereference bond->curr_active_slave, it uses rcu_dereference(). But it and the caller doesn't acquire RCU so a warning occurs. So add rcu_read_lock(). Test commands: ip netns add A ip netns exec A bash modprobe netdevsim echo "1 1" > /sys/bus/netdevsim/new_device ip link add bond0 type bond ip link set eth0 master bond0 ip link set eth0 up ip link set bond0 up ip x s add proto esp dst 14.1.1.1 src 15.1.1.1 spi 0x07 mode \ transport reqid 0x07 replay-window 32 aead 'rfc4106(gcm(aes))' \ 0x44434241343332312423222114131211f4f3f2f1 128 sel src 14.0.0.52/24 \ dst 14.0.0.70/24 proto tcp offload dev bond0 dir in ip x s f Splat looks like: ============================= WARNING: suspicious RCU usage 5.13.0-rc3+ #1168 Not tainted ----------------------------- drivers/net/bonding/bond_main.c:448 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by ip/705: #0: ffff888106701780 (&net->xfrm.xfrm_cfg_mutex){+.+.}-{3:3}, at: xfrm_netlink_rcv+0x59/0x80 [xfrm_user] #1: ffff8880075b0098 (&x->lock){+.-.}-{2:2}, at: xfrm_state_delete+0x16/0x30 stack backtrace: CPU: 6 PID: 705 Comm: ip Not tainted 5.13.0-rc3+ #1168 Call Trace: dump_stack+0xa4/0xe5 bond_ipsec_del_sa+0x16a/0x1c0 [bonding] __xfrm_state_delete+0x51f/0x730 xfrm_state_delete+0x1e/0x30 xfrm_state_flush+0x22f/0x390 xfrm_flush_sa+0xd8/0x260 [xfrm_user] ? xfrm_flush_policy+0x290/0x290 [xfrm_user] xfrm_user_rcv_msg+0x331/0x660 [xfrm_user] ? rcu_read_lock_sched_held+0x91/0xc0 ? xfrm_user_state_lookup.constprop.39+0x320/0x320 [xfrm_user] ? find_held_lock+0x3a/0x1c0 ? mutex_lock_io_nested+0x1210/0x1210 ? sched_clock_cpu+0x18/0x170 netlink_rcv_skb+0x121/0x350 [ ... ] Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5466b24ceab6b5..aa9c469ebbb56a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -444,21 +444,24 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!bond_dev) return; + rcu_read_lock(); bond = netdev_priv(bond_dev); slave = rcu_dereference(bond->curr_active_slave); if (!slave) - return; + goto out; xs->xso.real_dev = slave->dev; if (!(slave->dev->xfrmdev_ops && slave->dev->xfrmdev_ops->xdo_dev_state_delete)) { slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__); - return; + goto out; } slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); +out: + rcu_read_unlock(); } /** From b121693381b112b78c076dea171ee113e237c0e4 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:11 +0000 Subject: [PATCH 126/714] bonding: disallow setting nested bonding + ipsec offload bonding interface can be nested and it supports ipsec offload. So, it allows setting the nested bonding + ipsec scenario. But code does not support this scenario. So, it should be disallowed. interface graph: bond2 | bond1 | eth0 The nested bonding + ipsec offload may not a real usecase. So, disallowing this scenario is fine. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index aa9c469ebbb56a..f7b89743fab93e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -419,8 +419,9 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) xs->xso.real_dev = slave->dev; bond->xs = xs; - if (!(slave->dev->xfrmdev_ops - && slave->dev->xfrmdev_ops->xdo_dev_state_add)) { + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { slave_warn(bond_dev, slave->dev, "Slave does not support ipsec offload\n"); rcu_read_unlock(); return -EINVAL; @@ -453,8 +454,9 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) xs->xso.real_dev = slave->dev; - if (!(slave->dev->xfrmdev_ops - && slave->dev->xfrmdev_ops->xdo_dev_state_delete)) { + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__); goto out; } @@ -479,8 +481,9 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return true; - if (!(slave_dev->xfrmdev_ops - && slave_dev->xfrmdev_ops->xdo_dev_offload_ok)) { + if (!slave_dev->xfrmdev_ops || + !slave_dev->xfrmdev_ops->xdo_dev_offload_ok || + netif_is_bond_master(slave_dev)) { slave_warn(bond_dev, slave_dev, "%s: no slave xdo_dev_offload_ok\n", __func__); return false; } From 9a5605505d9c7dbfdb89cc29a8f5fc5cf9fd2334 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:12 +0000 Subject: [PATCH 127/714] bonding: Add struct bond_ipesc to manage SA bonding has been supporting ipsec offload. When SA is added, bonding just passes SA to its own active real interface. But it doesn't manage SA. So, when events(add/del real interface, active real interface change, etc) occur, bonding can't handle that well because It doesn't manage SA. So some problems(panic, UAF, refcnt leak)occur. In order to make it stable, it should manage SA. That's the reason why struct bond_ipsec is added. When a new SA is added to bonding interface, it is stored in the bond_ipsec list. And the SA is passed to a current active real interface. If events occur, it uses bond_ipsec data to handle these events. bond->ipsec_list is protected by bond->ipsec_lock. If a current active real interface is changed, the following logic works. 1. delete all SAs from old active real interface 2. Add all SAs to the new active real interface. 3. If a new active real interface doesn't support ipsec offload or SA's option, it sets real_dev to NULL. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 139 +++++++++++++++++++++++++++----- include/net/bonding.h | 9 ++- 2 files changed, 127 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7b89743fab93e..165fa55cfb38a8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -401,6 +401,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, static int bond_ipsec_add_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; int err; @@ -416,9 +417,6 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -ENODEV; } - xs->xso.real_dev = slave->dev; - bond->xs = xs; - if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_add || netif_is_bond_master(slave->dev)) { @@ -427,11 +425,63 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); + if (!ipsec) { + rcu_read_unlock(); + return -ENOMEM; + } + xs->xso.real_dev = slave->dev; + err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + if (!err) { + ipsec->xs = xs; + INIT_LIST_HEAD(&ipsec->list); + spin_lock_bh(&bond->ipsec_lock); + list_add(&ipsec->list, &bond->ipsec_list); + spin_unlock_bh(&bond->ipsec_lock); + } else { + kfree(ipsec); + } rcu_read_unlock(); return err; } +static void bond_ipsec_add_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) + goto out; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { + spin_lock_bh(&bond->ipsec_lock); + if (!list_empty(&bond->ipsec_list)) + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_add\n", + __func__); + spin_unlock_bh(&bond->ipsec_lock); + goto out; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + ipsec->xs->xso.real_dev = slave->dev; + if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs)) { + slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); + ipsec->xs->xso.real_dev = NULL; + } + } + spin_unlock_bh(&bond->ipsec_lock); +out: + rcu_read_unlock(); +} + /** * bond_ipsec_del_sa - clear out this specific SA * @xs: pointer to transformer state struct @@ -439,6 +489,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) static void bond_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -452,7 +503,10 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!slave) goto out; - xs->xso.real_dev = slave->dev; + if (!xs->xso.real_dev) + goto out; + + WARN_ON(xs->xso.real_dev != slave->dev); if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_delete || @@ -463,6 +517,48 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); out: + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +} + +static void bond_ipsec_del_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (!ipsec->xs->xso.real_dev) + continue; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_delete\n", + __func__); + } else { + slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + } + ipsec->xs->xso.real_dev = NULL; + } + spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); } @@ -474,22 +570,27 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; - struct bonding *bond = netdev_priv(bond_dev); - struct slave *curr_active = rcu_dereference(bond->curr_active_slave); - struct net_device *slave_dev = curr_active->dev; + struct net_device *real_dev; + struct slave *curr_active; + struct bonding *bond; + + bond = netdev_priv(bond_dev); + curr_active = rcu_dereference(bond->curr_active_slave); + real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return true; - if (!slave_dev->xfrmdev_ops || - !slave_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(slave_dev)) { - slave_warn(bond_dev, slave_dev, "%s: no slave xdo_dev_offload_ok\n", __func__); + if (!xs->xso.real_dev) + return false; + + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_offload_ok || + netif_is_bond_master(real_dev)) { return false; } - xs->xso.real_dev = slave_dev; - return slave_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + return real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); } static const struct xfrmdev_ops bond_xfrmdev_ops = { @@ -1006,8 +1107,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) return; #ifdef CONFIG_XFRM_OFFLOAD - if (old_active && bond->xs) - bond_ipsec_del_sa(bond->xs); + bond_ipsec_del_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ if (new_active) { @@ -1082,10 +1182,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } #ifdef CONFIG_XFRM_OFFLOAD - if (new_active && bond->xs) { - xfrm_dev_state_flush(dev_net(bond->dev), bond->dev, true); - bond_ipsec_add_sa(bond->xs); - } + bond_ipsec_add_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ /* resend IGMP joins since active slave has changed or @@ -3343,6 +3440,7 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); + xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); @@ -4910,7 +5008,8 @@ void bond_setup(struct net_device *bond_dev) #ifdef CONFIG_XFRM_OFFLOAD /* set up xfrm device ops (only supported in active-backup right now) */ bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; - bond->xs = NULL; + INIT_LIST_HEAD(&bond->ipsec_list); + spin_lock_init(&bond->ipsec_lock); #endif /* CONFIG_XFRM_OFFLOAD */ /* don't acquire bond device's netif_tx_lock when transmitting */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 15335732e16607..625d9c72dee37d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -201,6 +201,11 @@ struct bond_up_slave { */ #define BOND_LINK_NOCHANGE -1 +struct bond_ipsec { + struct list_head list; + struct xfrm_state *xs; +}; + /* * Here are the locking policies for the two bonding locks: * Get rcu_read_lock when reading or RTNL when writing slave list. @@ -249,7 +254,9 @@ struct bonding { #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; #ifdef CONFIG_XFRM_OFFLOAD - struct xfrm_state *xs; + struct list_head ipsec_list; + /* protecting ipsec_list */ + spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ }; From 955b785ec6b3b2f9b91914d6eeac8ee66ee29239 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:13 +0000 Subject: [PATCH 128/714] bonding: fix suspicious RCU usage in bond_ipsec_offload_ok() To dereference bond->curr_active_slave, it uses rcu_dereference(). But it and the caller doesn't acquire RCU so a warning occurs. So add rcu_read_lock(). Splat looks like: WARNING: suspicious RCU usage 5.13.0-rc6+ #1179 Not tainted drivers/net/bonding/bond_main.c:571 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ping/974: #0: ffff888109e7db70 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x1303/0x2cb0 stack backtrace: CPU: 2 PID: 974 Comm: ping Not tainted 5.13.0-rc6+ #1179 Call Trace: dump_stack+0xa4/0xe5 bond_ipsec_offload_ok+0x1f4/0x260 [bonding] xfrm_output+0x179/0x890 xfrm4_output+0xfa/0x410 ? __xfrm4_output+0x4b0/0x4b0 ? __ip_make_skb+0xecc/0x2030 ? xfrm4_udp_encap_rcv+0x800/0x800 ? ip_local_out+0x21/0x3a0 ip_send_skb+0x37/0xa0 raw_sendmsg+0x1bfd/0x2cb0 Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 165fa55cfb38a8..780f87869e36ef 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -573,24 +573,34 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) struct net_device *real_dev; struct slave *curr_active; struct bonding *bond; + int err; bond = netdev_priv(bond_dev); + rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); real_dev = curr_active->dev; - if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) - return true; + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + err = true; + goto out; + } - if (!xs->xso.real_dev) - return false; + if (!xs->xso.real_dev) { + err = false; + goto out; + } if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_offload_ok || netif_is_bond_master(real_dev)) { - return false; + err = false; + goto out; } - return real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); +out: + rcu_read_unlock(); + return err; } static const struct xfrmdev_ops bond_xfrmdev_ops = { From 168e696a36792a4a3b2525a06249e7472ef90186 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:14 +0000 Subject: [PATCH 129/714] bonding: fix incorrect return value of bond_ipsec_offload_ok() bond_ipsec_offload_ok() is called to check whether the interface supports ipsec offload or not. bonding interface support ipsec offload only in active-backup mode. So, if a bond interface is not in active-backup mode, it should return false but it returns true. Fixes: a3b658cfb664 ("bonding: allow xfrm offload setup post-module-load") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 780f87869e36ef..d22d783033112f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -581,7 +581,7 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - err = true; + err = false; goto out; } From 22b6d14992b733e9421a475f4d43df24629737ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 4 Jun 2021 12:37:44 -0700 Subject: [PATCH 130/714] scftorture: Avoid false-positive warnings in scftorture_invoker() If the call to set_cpus_allowed_ptr() in scftorture_invoker() fails, a later WARN_ONCE() complains. But with the advent of 570a752b7a9b ("lib/smp_processor_id: Use is_percpu_thread() instead of nr_cpus_allowed"), this complaint can be drowned out by complaints from smp_processor_id(). The rationale for this change is that scftorture's kthreads are not marked with PF_NO_SETAFFINITY, which means that a system administrator could change affinity at any time. However, scftorture is a torture test, and the system administrator might well have a valid test-the-test reason for changing affinity. This commit therefore changes to raw_smp_processor_id() in order to avoid the noise, and also adds a WARN_ON_ONCE() to the call to set_cpus_allowed_ptr() in order to directly detect immediate failure. There is no WARN_ON_ONCE() within the test loop, allowing human-reflex-based affinity resetting, if desired. Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 2377cbb324742e..29e8fc5d91a7bc 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -405,15 +405,15 @@ static int scftorture_invoker(void *arg) VERBOSE_SCFTORTOUT("scftorture_invoker %d: task started", scfp->cpu); cpu = scfp->cpu % nr_cpu_ids; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); + WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(cpu))); set_user_nice(current, MAX_NICE); if (holdoff) schedule_timeout_interruptible(holdoff * HZ); - VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, smp_processor_id()); + VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, raw_smp_processor_id()); // Make sure that the CPU is affinitized appropriately during testing. - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); WARN_ONCE(curcpu != scfp->cpu % nr_cpu_ids, "%s: Wanted CPU %d, running on %d, nr_cpu_ids = %d\n", __func__, scfp->cpu, curcpu, nr_cpu_ids); From 05bc276cf243d90b9f1eb6ae2962f41eeb53a741 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Jun 2021 09:24:43 -0700 Subject: [PATCH 131/714] refscale: Avoid false-positive warnings in ref_scale_reader() If the call to set_cpus_allowed_ptr() in ref_scale_reader() fails, a later WARN_ONCE() complains. But with the advent of 570a752b7a9b ("lib/smp_processor_id: Use is_percpu_thread() instead of nr_cpus_allowed"), this complaint can be drowned out by complaints from smp_processor_id(). The rationale for this change is that refscale's kthreads are not marked with PF_NO_SETAFFINITY, which means that a system administrator could change affinity at any time. However, refscale is a performance/stress test, and the system administrator might well have a valid test-the-test reason for changing affinity. This commit therefore changes to raw_smp_processor_id() in order to avoid the noise, and also adds a WARN_ON_ONCE() to the call to set_cpus_allowed_ptr() in order to directly detect immediate failure. There is no WARN_ON_ONCE() within the test loop, allowing human-reflex-based affinity resetting, if desired. Signed-off-by: Paul E. McKenney --- kernel/rcu/refscale.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 313d4547cbc7bf..d998a76fb5422e 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -487,13 +487,13 @@ ref_scale_reader(void *arg) s64 duration; VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); set_user_nice(current, MAX_NICE); atomic_inc(&n_init); if (holdoff) schedule_timeout_interruptible(holdoff * HZ); repeat: - VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, smp_processor_id()); + VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); // Wait for signal that this reader can start. wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || @@ -503,7 +503,7 @@ ref_scale_reader(void *arg) goto end; // Make sure that the CPU is affinitized appropriately during testing. - WARN_ON_ONCE(smp_processor_id() != me); + WARN_ON_ONCE(raw_smp_processor_id() != me); WRITE_ONCE(rt->start_reader, 0); if (!atomic_dec_return(&n_started)) From 1d10bf55d85d34eb73dd8263635f43fd72135d2d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 May 2021 10:12:45 -0700 Subject: [PATCH 132/714] rcu-tasks: Don't delete holdouts within trc_inspect_reader() As Yanfei pointed out, although invoking trc_del_holdout() is safe from the viewpoint of the integrity of the holdout list itself, the put_task_struct() invoked by trc_del_holdout() can result in use-after-free errors due to later accesses to this task_struct structure by the RCU Tasks Trace grace-period kthread. This commit therefore removes this call to trc_del_holdout() from trc_inspect_reader() in favor of the grace-period thread's existing call to trc_del_holdout(), thus eliminating that particular class of use-after-free errors. Reported-by: "Xu, Yanfei" Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 03a118d1c00392..3d5cb6cb8a6dba 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -953,10 +953,9 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) in_qs = likely(!t->trc_reader_nesting); } - // Mark as checked. Because this is called from the grace-period - // kthread, also remove the task from the holdout list. + // Mark as checked so that the grace-period kthread will + // remove it from the holdout list. t->trc_reader_checked = true; - trc_del_holdout(t); if (in_qs) return true; // Already in quiescent state, done!!! From ad1f37970875eef98eeaf478f55045f388b794a5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Jul 2021 12:18:02 +0100 Subject: [PATCH 133/714] octeontx2-pf: Fix assigned error return value that is never used Currently when the call to otx2_mbox_alloc_msg_cgx_mac_addr_update fails the error return variable rc is being assigned -ENOMEM and does not return early. rc is then re-assigned and the error case is not handled correctly. Fix this by returning -ENOMEM rather than assigning rc. Addresses-Coverity: ("Unused value") Fixes: 79d2be385e9e ("octeontx2-pf: offload DMAC filters to CGX/RPM block") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c index ffe3e94562d0b9..383a6b5cb698b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c @@ -161,7 +161,7 @@ int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos) if (!req) { mutex_unlock(&pf->mbox.lock); - rc = -ENOMEM; + return -ENOMEM; } ether_addr_copy(req->mac_addr, mac); From ccd27f05ae7b8ebc40af5b004e94517a919aa862 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 6 Jul 2021 11:13:35 +0200 Subject: [PATCH 134/714] ipv6: fix 'disable_policy' for fwd packets The goal of commit df789fe75206 ("ipv6: Provide ipv6 version of "disable_policy" sysctl") was to have the disable_policy from ipv4 available on ipv6. However, it's not exactly the same mechanism. On IPv4, all packets coming from an interface, which has disable_policy set, bypass the policy check. For ipv6, this is done only for local packets, ie for packets destinated to an address configured on the incoming interface. Let's align ipv6 with ipv4 so that the 'disable_policy' sysctl has the same effect for both protocols. My first approach was to create a new kind of route cache entries, to be able to set DST_NOPOLICY without modifying routes. This would have added a lot of code. Because the local delivery path is already handled, I choose to focus on the forwarding path to minimize code churn. Fixes: df789fe75206 ("ipv6: Provide ipv6 version of "disable_policy" sysctl") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 984050f35c61fa..d4ee2169afd816 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -479,7 +479,9 @@ int ip6_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { + if (!net->ipv6.devconf_all->disable_policy && + !idev->cnf.disable_policy && + !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } From a9ab9cce9367a2cc02a3c7eb57a004dc0b8f380d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 May 2021 11:28:40 -0700 Subject: [PATCH 135/714] rcu-tasks: Don't delete holdouts within trc_wait_for_one_reader() Invoking trc_del_holdout() from within trc_wait_for_one_reader() is only a performance optimization because the RCU Tasks Trace grace-period kthread will eventually do this within check_all_holdout_tasks_trace(). But it is not a particularly important performance optimization because it only applies to the grace-period kthread, of which there is but one. This commit therefore removes this invocation of trc_del_holdout() in favor of the one in check_all_holdout_tasks_trace() in the grace-period kthread. Reported-by: "Xu, Yanfei" Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 3d5cb6cb8a6dba..8536c55df51426 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -982,7 +982,6 @@ static void trc_wait_for_one_reader(struct task_struct *t, // The current task had better be in a quiescent state. if (t == current) { t->trc_reader_checked = true; - trc_del_holdout(t); WARN_ON_ONCE(t->trc_reader_nesting); return; } From 2a2ed5618a0e8a890d948b88b368c0459f35136c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Jul 2021 13:59:35 -0700 Subject: [PATCH 136/714] rcu: Fix pr_info() formats and values in show_rcu_gp_kthreads() This commit changes from "%lx" to "%x" and from "0x1ffffL" to "0x1ffff" to match the change in type between the old field ->state (unsigned long) and the new field ->__state (unsigned int). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 3f937b20814fdc..6c76988cc019f6 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -795,9 +795,9 @@ void show_rcu_gp_kthreads(void) jr = j - data_race(rcu_state.gp_req_activity); js = j - data_race(rcu_state.gp_start); jw = j - data_race(rcu_state.gp_wake_time); - pr_info("%s: wait state: %s(%d) ->state: %#lx ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", + pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, t ? t->__state : 0x1ffffL, t ? t->rt_priority : 0xffU, + rcu_state.gp_state, t ? t->__state : 0x1ffff, t ? t->rt_priority : 0xffU, js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), (long)data_race(rcu_state.gp_seq), (long)data_race(rcu_get_root()->gp_seq_needed), From af0efa050caa66e8f304c42c94c76cb6c480cb7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 6 Jul 2021 14:23:55 +0200 Subject: [PATCH 137/714] libbpf: Restore errno return for functions that were already returning it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The update to streamline libbpf error reporting intended to change all functions to return the errno as a negative return value if LIBBPF_STRICT_DIRECT_ERRS is set. However, if the flag is *not* set, the return value changes for the two functions that were already returning a negative errno unconditionally: bpf_link__unpin() and perf_buffer__poll(). This is a user-visible API change that breaks applications; so let's revert these two functions back to unconditionally returning a negative errno value. Fixes: e9fc3ce99b34 ("libbpf: Streamline error reporting for high-level APIs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210706122355.236082-1-toke@redhat.com --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1e04ce724240f5..6f5e2757bb3cfa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10136,7 +10136,7 @@ int bpf_link__unpin(struct bpf_link *link) err = unlink(link->pin_path); if (err != 0) - return libbpf_err_errno(err); + return -errno; pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); zfree(&link->pin_path); @@ -11197,7 +11197,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); if (cnt < 0) - return libbpf_err_errno(cnt); + return -errno; for (i = 0; i < cnt; i++) { struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; From c9cd752d8f3a6b13afc5332a60bea3e68f141738 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 5 Jul 2021 15:34:41 +0200 Subject: [PATCH 138/714] regulator: fixed: Mark regulator-fixed-domain as deprecated A power domain should not be modelled as a regulator, not even for the simplest case as recent discussions have concluded around the existing regulator-fixed-domain DT binding. Fortunately, there is only one user of the binding that was recently added. Therefore, let's mark the binding as deprecated to prevent it from being further used. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20210705133441.11344-1-ulf.hansson@linaro.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/fixed-regulator.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index 8850c01bd47060..9b131c6facbc00 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -57,12 +57,14 @@ properties: maxItems: 1 power-domains: + deprecated: true description: Power domain to use for enable control. This binding is only available if the compatible is chosen to regulator-fixed-domain. maxItems: 1 required-opps: + deprecated: true description: Performance state to use for enable control. This binding is only available if the compatible is chosen to regulator-fixed-domain. The From ea986908ccfcc53204a03bb0841227e1b26578c4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 2 Jul 2021 22:21:40 +0800 Subject: [PATCH 139/714] regulator: mtk-dvfsrc: Fix wrong dev pointer for devm_regulator_register If use dev->parent, the regulator_unregister will not be called when this driver is unloaded. Fix it by using dev instead. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210702142140.2678130-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/mtk-dvfsrc-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/mtk-dvfsrc-regulator.c b/drivers/regulator/mtk-dvfsrc-regulator.c index d3d876198d6ece..234af3a66c77df 100644 --- a/drivers/regulator/mtk-dvfsrc-regulator.c +++ b/drivers/regulator/mtk-dvfsrc-regulator.c @@ -179,8 +179,7 @@ static int dvfsrc_vcore_regulator_probe(struct platform_device *pdev) for (i = 0; i < regulator_init_data->size; i++) { config.dev = dev->parent; config.driver_data = (mt_regulators + i); - rdev = devm_regulator_register(dev->parent, - &(mt_regulators + i)->desc, + rdev = devm_regulator_register(dev, &(mt_regulators + i)->desc, &config); if (IS_ERR(rdev)) { dev_err(dev, "failed to register %s\n", From 135cbd378eab336da15de9c84bbb22bf743b38a5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 3 Jul 2021 04:23:00 +0200 Subject: [PATCH 140/714] spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 00b80ac935539 ("spi: imx: mx51-ecspi: Move some initialisation to prepare_message hook."), the MX51_ECSPI_CONFIG write no longer happens in prepare_transfer hook, but rather in prepare_message hook, however the MX51_ECSPI_CONFIG delay is still left in prepare_transfer hook and thus has no effect. This leads to low bus frequency operation problems described in 6fd8b8503a0dc ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") again. Move the MX51_ECSPI_CONFIG write delay into the prepare_message hook as well, thus reinstating the low bus frequency fix. Fixes: 00b80ac935539 ("spi: imx: mx51-ecspi: Move some initialisation to prepare_message hook.") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Link: https://lore.kernel.org/r/20210703022300.296114-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 39dc02e366f4b4..4aee3db6d6df0a 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -506,7 +506,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, { struct spi_device *spi = msg->spi; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; - u32 testreg; + u32 testreg, delay; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); /* set Master or Slave mode */ @@ -567,6 +567,23 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); + /* + * Wait until the changes in the configuration register CONFIGREG + * propagate into the hardware. It takes exactly one tick of the + * SCLK clock, but we will wait two SCLK clock just to be sure. The + * effect of the delay it takes for the hardware to apply changes + * is noticable if the SCLK clock run very slow. In such a case, if + * the polarity of SCLK should be inverted, the GPIO ChipSelect might + * be asserted before the SCLK polarity changes, which would disrupt + * the SPI communication as the device on the other end would consider + * the change of SCLK polarity as a clock tick already. + */ + delay = (2 * 1000000) / spi_imx->spi_bus_clk; + if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ + udelay(delay); + else /* SCLK is _very_ slow */ + usleep_range(delay, delay + 10); + return 0; } @@ -574,7 +591,7 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, struct spi_device *spi) { u32 ctrl = readl(spi_imx->base + MX51_ECSPI_CTRL); - u32 clk, delay; + u32 clk; /* Clear BL field and set the right value */ ctrl &= ~MX51_ECSPI_CTRL_BL_MASK; @@ -596,23 +613,6 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); - /* - * Wait until the changes in the configuration register CONFIGREG - * propagate into the hardware. It takes exactly one tick of the - * SCLK clock, but we will wait two SCLK clock just to be sure. The - * effect of the delay it takes for the hardware to apply changes - * is noticable if the SCLK clock run very slow. In such a case, if - * the polarity of SCLK should be inverted, the GPIO ChipSelect might - * be asserted before the SCLK polarity changes, which would disrupt - * the SPI communication as the device on the other end would consider - * the change of SCLK polarity as a clock tick already. - */ - delay = (2 * 1000000) / clk; - if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ - udelay(delay); - else /* SCLK is _very_ slow */ - usleep_range(delay, delay + 10); - return 0; } From d322957ebfb9c21c2c72b66680f7c3ccd724e081 Mon Sep 17 00:00:00 2001 From: Duncan Roe Date: Wed, 7 Jul 2021 10:57:51 +1000 Subject: [PATCH 141/714] netfilter: uapi: refer to nfnetlink_conntrack.h, not nf_conntrack_netlink.h nf_conntrack_netlink.h does not exist, refer to nfnetlink_conntrack.h instead. Signed-off-by: Duncan Roe Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_log.h | 2 +- include/uapi/linux/netfilter/nfnetlink_queue.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nfnetlink_log.h b/include/uapi/linux/netfilter/nfnetlink_log.h index 45c8d3b027e027..0af9c113d6652b 100644 --- a/include/uapi/linux/netfilter/nfnetlink_log.h +++ b/include/uapi/linux/netfilter/nfnetlink_log.h @@ -61,7 +61,7 @@ enum nfulnl_attr_type { NFULA_HWTYPE, /* hardware type */ NFULA_HWHEADER, /* hardware header */ NFULA_HWLEN, /* hardware header length */ - NFULA_CT, /* nf_conntrack_netlink.h */ + NFULA_CT, /* nfnetlink_conntrack.h */ NFULA_CT_INFO, /* enum ip_conntrack_info */ NFULA_VLAN, /* nested attribute: packet vlan info */ NFULA_L2HDR, /* full L2 header */ diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index bcb2cb5d40b9f9..aed90c4df0c8cf 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -51,11 +51,11 @@ enum nfqnl_attr_type { NFQA_IFINDEX_PHYSOUTDEV, /* __u32 ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ - NFQA_CT, /* nf_conntrack_netlink.h */ + NFQA_CT, /* nfnetlink_conntrack.h */ NFQA_CT_INFO, /* enum ip_conntrack_info */ NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_SKB_INFO, /* __u32 skb meta information */ - NFQA_EXP, /* nf_conntrack_netlink.h */ + NFQA_EXP, /* nfnetlink_conntrack.h */ NFQA_UID, /* __u32 sk uid */ NFQA_GID, /* __u32 sk gid */ NFQA_SECCTX, /* security context string */ From 54afaae34ee49e98c1c902b444b42832551d090c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 23 Jun 2021 17:54:54 +0200 Subject: [PATCH 142/714] btrfs: zoned: fix types for u64 division in btrfs_reclaim_bgs_work The types in calculation of the used percentage in the reclaiming messages are both u64, though bg->length is either 1GiB (non-zoned) or the zone size in the zoned mode. The upper limit on zone size is 8GiB so this could theoretically overflow in the future, right now the values fit. Fixes: 18bb8bbf13c1 ("btrfs: zoned: automatically reclaim zones") CC: stable@vger.kernel.org # 5.13 Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 38b127b9edfc92..d4c8dc55088933 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1540,7 +1540,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used", - bg->start, div_u64(bg->used * 100, bg->length)); + bg->start, div64_u64(bg->used * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); ret = btrfs_relocate_chunk(fs_info, bg->start); if (ret) From 5f93e776c6734cea989aeb4f2d6c97e521baa683 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 29 Jun 2021 03:16:46 +0900 Subject: [PATCH 143/714] btrfs: zoned: print unusable percentage when reclaiming block groups When we're automatically reclaiming a zone, because its zone_unusable value is above the reclaim threshold, we're only logging how much percent of the zone's capacity are used, but not how much of the capacity is unusable. Also print the percentage of the unusable space in the block group before we're reclaiming it. Example: BTRFS info (device sdg): reclaiming chunk 230686720 with 13% used 86% unusable CC: stable@vger.kernel.org # 5.13 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index d4c8dc55088933..fec7a34b27f3e2 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1501,6 +1501,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) mutex_lock(&fs_info->reclaim_bgs_lock); spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->reclaim_bgs)) { + u64 zone_unusable; int ret = 0; bg = list_first_entry(&fs_info->reclaim_bgs, @@ -1534,13 +1535,22 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; } + /* + * Cache the zone_unusable value before turning the block group + * to read only. As soon as the blog group is read only it's + * zone_unusable value gets moved to the block group's read-only + * bytes and isn't available for calculations anymore. + */ + zone_unusable = bg->zone_unusable; ret = inc_block_group_ro(bg, 0); up_write(&space_info->groups_sem); if (ret < 0) goto next; - btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used", - bg->start, div64_u64(bg->used * 100, bg->length)); + btrfs_info(fs_info, + "reclaiming chunk %llu with %llu%% used %llu%% unusable", + bg->start, div_u64(bg->used * 100, bg->length), + div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); ret = btrfs_relocate_chunk(fs_info, bg->start); if (ret) From 1cb3db1cf383a3c7dbda1aa0ce748b0958759947 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 29 Jun 2021 14:43:05 +0100 Subject: [PATCH 144/714] btrfs: fix deadlock with concurrent chunk allocations involving system chunks When a task attempting to allocate a new chunk verifies that there is not currently enough free space in the system space_info and there is another task that allocated a new system chunk but it did not finish yet the creation of the respective block group, it waits for that other task to finish creating the block group. This is to avoid exhaustion of the system chunk array in the superblock, which is limited, when we have a thundering herd of tasks allocating new chunks. This problem was described and fixed by commit eafa4fd0ad0607 ("btrfs: fix exhaustion of the system chunk array due to concurrent allocations"). However there are two very similar scenarios where this can lead to a deadlock: 1) Task B allocated a new system chunk and task A is waiting on task B to finish creation of the respective system block group. However before task B ends its transaction handle and finishes the creation of the system block group, it attempts to allocate another chunk (like a data chunk for an fallocate operation for a very large range). Task B will be unable to progress and allocate the new chunk, because task A set space_info->chunk_alloc to 1 and therefore it loops at btrfs_chunk_alloc() waiting for task A to finish its chunk allocation and set space_info->chunk_alloc to 0, but task A is waiting on task B to finish creation of the new system block group, therefore resulting in a deadlock; 2) Task B allocated a new system chunk and task A is waiting on task B to finish creation of the respective system block group. By the time that task B enter the final phase of block group allocation, which happens at btrfs_create_pending_block_groups(), when it modifies the extent tree, the device tree or the chunk tree to insert the items for some new block group, it needs to allocate a new chunk, so it ends up at btrfs_chunk_alloc() and keeps looping there because task A has set space_info->chunk_alloc to 1, but task A is waiting for task B to finish creation of the new system block group and release the reserved system space, therefore resulting in a deadlock. In short, the problem is if a task B needs to allocate a new chunk after it previously allocated a new system chunk and if another task A is currently waiting for task B to complete the allocation of the new system chunk. Unfortunately this deadlock scenario introduced by the previous fix for the system chunk array exhaustion problem does not have a simple and short fix, and requires a big change to rework the chunk allocation code so that chunk btree updates are all made in the first phase of chunk allocation. And since this deadlock regression is being frequently hit on zoned filesystems and the system chunk array exhaustion problem is triggered in more extreme cases (originally observed on PowerPC with a node size of 64K when running the fallocate tests from stress-ng), revert the changes from that commit. The next patch in the series, with a subject of "btrfs: rework chunk allocation to avoid exhaustion of the system chunk array" does the necessary changes to fix the system chunk array exhaustion problem. Reported-by: Naohiro Aota Link: https://lore.kernel.org/linux-btrfs/20210621015922.ewgbffxuawia7liz@naota-xeon/ Fixes: eafa4fd0ad0607 ("btrfs: fix exhaustion of the system chunk array due to concurrent allocations") CC: stable@vger.kernel.org # 5.12+ Tested-by: Shin'ichiro Kawasaki Tested-by: Naohiro Aota Signed-off-by: Filipe Manana Tested-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 58 +----------------------------------------- fs/btrfs/transaction.c | 5 ---- fs/btrfs/transaction.h | 7 ----- 3 files changed, 1 insertion(+), 69 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index fec7a34b27f3e2..a26209f9827915 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3377,7 +3377,6 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) */ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) { - struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; @@ -3392,7 +3391,6 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) lockdep_assert_held(&fs_info->chunk_mutex); info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); -again: spin_lock(&info->lock); left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); @@ -3411,58 +3409,6 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (left < thresh) { u64 flags = btrfs_system_alloc_profile(fs_info); - u64 reserved = atomic64_read(&cur_trans->chunk_bytes_reserved); - - /* - * If there's not available space for the chunk tree (system - * space) and there are other tasks that reserved space for - * creating a new system block group, wait for them to complete - * the creation of their system block group and release excess - * reserved space. We do this because: - * - * *) We can end up allocating more system chunks than necessary - * when there are multiple tasks that are concurrently - * allocating block groups, which can lead to exhaustion of - * the system array in the superblock; - * - * *) If we allocate extra and unnecessary system block groups, - * despite being empty for a long time, and possibly forever, - * they end not being added to the list of unused block groups - * because that typically happens only when deallocating the - * last extent from a block group - which never happens since - * we never allocate from them in the first place. The few - * exceptions are when mounting a filesystem or running scrub, - * which add unused block groups to the list of unused block - * groups, to be deleted by the cleaner kthread. - * And even when they are added to the list of unused block - * groups, it can take a long time until they get deleted, - * since the cleaner kthread might be sleeping or busy with - * other work (deleting subvolumes, running delayed iputs, - * defrag scheduling, etc); - * - * This is rare in practice, but can happen when too many tasks - * are allocating blocks groups in parallel (via fallocate()) - * and before the one that reserved space for a new system block - * group finishes the block group creation and releases the space - * reserved in excess (at btrfs_create_pending_block_groups()), - * other tasks end up here and see free system space temporarily - * not enough for updating the chunk tree. - * - * We unlock the chunk mutex before waiting for such tasks and - * lock it again after the wait, otherwise we would deadlock. - * It is safe to do so because allocating a system chunk is the - * first thing done while allocating a new block group. - */ - if (reserved > trans->chunk_bytes_reserved) { - const u64 min_needed = reserved - thresh; - - mutex_unlock(&fs_info->chunk_mutex); - wait_event(cur_trans->chunk_reserve_wait, - atomic64_read(&cur_trans->chunk_bytes_reserved) <= - min_needed); - mutex_lock(&fs_info->chunk_mutex); - goto again; - } /* * Ignore failure to create system chunk. We might end up not @@ -3477,10 +3423,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) ret = btrfs_block_rsv_add(fs_info->chunk_root, &fs_info->chunk_block_rsv, thresh, BTRFS_RESERVE_NO_FLUSH); - if (!ret) { - atomic64_add(thresh, &cur_trans->chunk_bytes_reserved); + if (!ret) trans->chunk_bytes_reserved += thresh; - } } } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 50318231c1a881..443c348bc6f3f5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -260,7 +260,6 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans) void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_transaction *cur_trans = trans->transaction; if (!trans->chunk_bytes_reserved) return; @@ -269,8 +268,6 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv, trans->chunk_bytes_reserved, NULL); - atomic64_sub(trans->chunk_bytes_reserved, &cur_trans->chunk_bytes_reserved); - cond_wake_up(&cur_trans->chunk_reserve_wait); trans->chunk_bytes_reserved = 0; } @@ -386,8 +383,6 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, spin_lock_init(&cur_trans->dropped_roots_lock); INIT_LIST_HEAD(&cur_trans->releasing_ebs); spin_lock_init(&cur_trans->releasing_ebs_lock); - atomic64_set(&cur_trans->chunk_bytes_reserved, 0); - init_waitqueue_head(&cur_trans->chunk_reserve_wait); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 07d76029f598e0..a18d67796b544b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -96,13 +96,6 @@ struct btrfs_transaction { spinlock_t releasing_ebs_lock; struct list_head releasing_ebs; - - /* - * The number of bytes currently reserved, by all transaction handles - * attached to this transaction, for metadata extents of the chunk tree. - */ - atomic64_t chunk_bytes_reserved; - wait_queue_head_t chunk_reserve_wait; }; #define __TRANS_FREEZABLE (1U << 0) From 79bd37120b149532af5b21953643ed74af69654f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 29 Jun 2021 14:43:06 +0100 Subject: [PATCH 145/714] btrfs: rework chunk allocation to avoid exhaustion of the system chunk array Commit eafa4fd0ad0607 ("btrfs: fix exhaustion of the system chunk array due to concurrent allocations") fixed a problem that resulted in exhausting the system chunk array in the superblock when there are many tasks allocating chunks in parallel. Basically too many tasks enter the first phase of chunk allocation without previous tasks having finished their second phase of allocation, resulting in too many system chunks being allocated. That was originally observed when running the fallocate tests of stress-ng on a PowerPC machine, using a node size of 64K. However that commit also introduced a deadlock where a task in phase 1 of the chunk allocation waited for another task that had allocated a system chunk to finish its phase 2, but that other task was waiting on an extent buffer lock held by the first task, therefore resulting in both tasks not making any progress. That change was later reverted by a patch with the subject "btrfs: fix deadlock with concurrent chunk allocations involving system chunks", since there is no simple and short solution to address it and the deadlock is relatively easy to trigger on zoned filesystems, while the system chunk array exhaustion is not so common. This change reworks the chunk allocation to avoid the system chunk array exhaustion. It accomplishes that by making the first phase of chunk allocation do the updates of the device items in the chunk btree and the insertion of the new chunk item in the chunk btree. This is done while under the protection of the chunk mutex (fs_info->chunk_mutex), in the same critical section that checks for available system space, allocates a new system chunk if needed and reserves system chunk space. This way we do not have chunk space reserved until the second phase completes. The same logic is applied to chunk removal as well, since it keeps reserved system space long after it is done updating the chunk btree. For direct allocation of system chunks, the previous behaviour remains, because otherwise we would deadlock on extent buffers of the chunk btree. Changes to the chunk btree are by large done by chunk allocation and chunk removal, which first reserve chunk system space and then later do changes to the chunk btree. The other remaining cases are uncommon and correspond to adding a device, removing a device and resizing a device. All these other cases do not pre-reserve system space, they modify the chunk btree right away, so they don't hold reserved space for a long period like chunk allocation and chunk removal do. The diff of this change is huge, but more than half of it is just addition of comments describing both how things work regarding chunk allocation and removal, including both the new behavior and the parts of the old behavior that did not change. CC: stable@vger.kernel.org # 5.12+ Tested-by: Shin'ichiro Kawasaki Tested-by: Naohiro Aota Signed-off-by: Filipe Manana Tested-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 285 ++++++++++++++++++++++++++++----- fs/btrfs/block-group.h | 6 +- fs/btrfs/ctree.c | 67 ++------ fs/btrfs/transaction.c | 10 +- fs/btrfs/transaction.h | 2 +- fs/btrfs/volumes.c | 355 +++++++++++++++++++++++++++++++---------- fs/btrfs/volumes.h | 5 +- 7 files changed, 546 insertions(+), 184 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a26209f9827915..c557327b454582 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2207,6 +2207,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) return ret; } +/* + * This function, insert_block_group_item(), belongs to the phase 2 of chunk + * allocation. + * + * See the comment at btrfs_chunk_alloc() for details about the chunk allocation + * phases. + */ static int insert_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group) { @@ -2229,15 +2236,19 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); } +/* + * This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of + * chunk allocation. + * + * See the comment at btrfs_chunk_alloc() for details about the chunk allocation + * phases. + */ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *block_group; int ret = 0; - if (!trans->can_flush_pending_bgs) - return; - while (!list_empty(&trans->new_bgs)) { int index; @@ -2252,6 +2263,13 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) ret = insert_block_group_item(trans, block_group); if (ret) btrfs_abort_transaction(trans, ret); + if (!block_group->chunk_item_inserted) { + mutex_lock(&fs_info->chunk_mutex); + ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group); + mutex_unlock(&fs_info->chunk_mutex); + if (ret) + btrfs_abort_transaction(trans, ret); + } ret = btrfs_finish_chunk_alloc(trans, block_group->start, block_group->length); if (ret) @@ -2275,8 +2293,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) btrfs_trans_release_chunk_metadata(trans); } -int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, - u64 type, u64 chunk_offset, u64 size) +struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, + u64 bytes_used, u64 type, + u64 chunk_offset, u64 size) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; @@ -2286,7 +2305,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, cache = btrfs_create_block_group_cache(fs_info, chunk_offset); if (!cache) - return -ENOMEM; + return ERR_PTR(-ENOMEM); cache->length = size; set_free_space_tree_thresholds(cache); @@ -2300,7 +2319,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, ret = btrfs_load_block_group_zone_info(cache, true); if (ret) { btrfs_put_block_group(cache); - return ret; + return ERR_PTR(ret); } ret = exclude_super_stripes(cache); @@ -2308,7 +2327,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, /* We may have excluded something, so call this just in case */ btrfs_free_excluded_extents(cache); btrfs_put_block_group(cache); - return ret; + return ERR_PTR(ret); } add_new_free_space(cache, chunk_offset, chunk_offset + size); @@ -2335,7 +2354,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, if (ret) { btrfs_remove_free_space_cache(cache); btrfs_put_block_group(cache); - return ret; + return ERR_PTR(ret); } /* @@ -2354,7 +2373,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, btrfs_update_delayed_refs_rsv(trans); set_avail_alloc_bits(fs_info, type); - return 0; + return cache; } /* @@ -3232,11 +3251,203 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } +static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +{ + struct btrfs_block_group *bg; + int ret; + + /* + * Check if we have enough space in the system space info because we + * will need to update device items in the chunk btree and insert a new + * chunk item in the chunk btree as well. This will allocate a new + * system block group if needed. + */ + check_system_chunk(trans, flags); + + bg = btrfs_alloc_chunk(trans, flags); + if (IS_ERR(bg)) { + ret = PTR_ERR(bg); + goto out; + } + + /* + * If this is a system chunk allocation then stop right here and do not + * add the chunk item to the chunk btree. This is to prevent a deadlock + * because this system chunk allocation can be triggered while COWing + * some extent buffer of the chunk btree and while holding a lock on a + * parent extent buffer, in which case attempting to insert the chunk + * item (or update the device item) would result in a deadlock on that + * parent extent buffer. In this case defer the chunk btree updates to + * the second phase of chunk allocation and keep our reservation until + * the second phase completes. + * + * This is a rare case and can only be triggered by the very few cases + * we have where we need to touch the chunk btree outside chunk allocation + * and chunk removal. These cases are basically adding a device, removing + * a device or resizing a device. + */ + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + return 0; + + ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); + /* + * Normally we are not expected to fail with -ENOSPC here, since we have + * previously reserved space in the system space_info and allocated one + * new system chunk if necessary. However there are two exceptions: + * + * 1) We may have enough free space in the system space_info but all the + * existing system block groups have a profile which can not be used + * for extent allocation. + * + * This happens when mounting in degraded mode. For example we have a + * RAID1 filesystem with 2 devices, lose one device and mount the fs + * using the other device in degraded mode. If we then allocate a chunk, + * we may have enough free space in the existing system space_info, but + * none of the block groups can be used for extent allocation since they + * have a RAID1 profile, and because we are in degraded mode with a + * single device, we are forced to allocate a new system chunk with a + * SINGLE profile. Making check_system_chunk() iterate over all system + * block groups and check if they have a usable profile and enough space + * can be slow on very large filesystems, so we tolerate the -ENOSPC and + * try again after forcing allocation of a new system chunk. Like this + * we avoid paying the cost of that search in normal circumstances, when + * we were not mounted in degraded mode; + * + * 2) We had enough free space info the system space_info, and one suitable + * block group to allocate from when we called check_system_chunk() + * above. However right after we called it, the only system block group + * with enough free space got turned into RO mode by a running scrub, + * and in this case we have to allocate a new one and retry. We only + * need do this allocate and retry once, since we have a transaction + * handle and scrub uses the commit root to search for block groups. + */ + if (ret == -ENOSPC) { + const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); + struct btrfs_block_group *sys_bg; + + sys_bg = btrfs_alloc_chunk(trans, sys_flags); + if (IS_ERR(sys_bg)) { + ret = PTR_ERR(sys_bg); + btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } + } else if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } +out: + btrfs_trans_release_chunk_metadata(trans); + + return ret; +} + /* - * If force is CHUNK_ALLOC_FORCE: + * Chunk allocation is done in 2 phases: + * + * 1) Phase 1 - through btrfs_chunk_alloc() we allocate device extents for + * the chunk, the chunk mapping, create its block group and add the items + * that belong in the chunk btree to it - more specifically, we need to + * update device items in the chunk btree and add a new chunk item to it. + * + * 2) Phase 2 - through btrfs_create_pending_block_groups(), we add the block + * group item to the extent btree and the device extent items to the devices + * btree. + * + * This is done to prevent deadlocks. For example when COWing a node from the + * extent btree we are holding a write lock on the node's parent and if we + * trigger chunk allocation and attempted to insert the new block group item + * in the extent btree right way, we could deadlock because the path for the + * insertion can include that parent node. At first glance it seems impossible + * to trigger chunk allocation after starting a transaction since tasks should + * reserve enough transaction units (metadata space), however while that is true + * most of the time, chunk allocation may still be triggered for several reasons: + * + * 1) When reserving metadata, we check if there is enough free space in the + * metadata space_info and therefore don't trigger allocation of a new chunk. + * However later when the task actually tries to COW an extent buffer from + * the extent btree or from the device btree for example, it is forced to + * allocate a new block group (chunk) because the only one that had enough + * free space was just turned to RO mode by a running scrub for example (or + * device replace, block group reclaim thread, etc), so we can not use it + * for allocating an extent and end up being forced to allocate a new one; + * + * 2) Because we only check that the metadata space_info has enough free bytes, + * we end up not allocating a new metadata chunk in that case. However if + * the filesystem was mounted in degraded mode, none of the existing block + * groups might be suitable for extent allocation due to their incompatible + * profile (for e.g. mounting a 2 devices filesystem, where all block groups + * use a RAID1 profile, in degraded mode using a single device). In this case + * when the task attempts to COW some extent buffer of the extent btree for + * example, it will trigger allocation of a new metadata block group with a + * suitable profile (SINGLE profile in the example of the degraded mount of + * the RAID1 filesystem); + * + * 3) The task has reserved enough transaction units / metadata space, but when + * it attempts to COW an extent buffer from the extent or device btree for + * example, it does not find any free extent in any metadata block group, + * therefore forced to try to allocate a new metadata block group. + * This is because some other task allocated all available extents in the + * meanwhile - this typically happens with tasks that don't reserve space + * properly, either intentionally or as a bug. One example where this is + * done intentionally is fsync, as it does not reserve any transaction units + * and ends up allocating a variable number of metadata extents for log + * tree extent buffers. + * + * We also need this 2 phases setup when adding a device to a filesystem with + * a seed device - we must create new metadata and system chunks without adding + * any of the block group items to the chunk, extent and device btrees. If we + * did not do it this way, we would get ENOSPC when attempting to update those + * btrees, since all the chunks from the seed device are read-only. + * + * Phase 1 does the updates and insertions to the chunk btree because if we had + * it done in phase 2 and have a thundering herd of tasks allocating chunks in + * parallel, we risk having too many system chunks allocated by many tasks if + * many tasks reach phase 1 without the previous ones completing phase 2. In the + * extreme case this leads to exhaustion of the system chunk array in the + * superblock. This is easier to trigger if using a btree node/leaf size of 64K + * and with RAID filesystems (so we have more device items in the chunk btree). + * This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of + * the system chunk array due to concurrent allocations") provides more details. + * + * For allocation of system chunks, we defer the updates and insertions into the + * chunk btree to phase 2. This is to prevent deadlocks on extent buffers because + * if the chunk allocation is triggered while COWing an extent buffer of the + * chunk btree, we are holding a lock on the parent of that extent buffer and + * doing the chunk btree updates and insertions can require locking that parent. + * This is for the very few and rare cases where we update the chunk btree that + * are not chunk allocation or chunk removal: adding a device, removing a device + * or resizing a device. + * + * The reservation of system space, done through check_system_chunk(), as well + * as all the updates and insertions into the chunk btree must be done while + * holding fs_info->chunk_mutex. This is important to guarantee that while COWing + * an extent buffer from the chunks btree we never trigger allocation of a new + * system chunk, which would result in a deadlock (trying to lock twice an + * extent buffer of the chunk btree, first time before triggering the chunk + * allocation and the second time during chunk allocation while attempting to + * update the chunks btree). The system chunk array is also updated while holding + * that mutex. The same logic applies to removing chunks - we must reserve system + * space, update the chunk btree and the system chunk array in the superblock + * while holding fs_info->chunk_mutex. + * + * This function, btrfs_chunk_alloc(), belongs to phase 1. + * + * If @force is CHUNK_ALLOC_FORCE: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. - * If force is NOT CHUNK_ALLOC_FORCE: + * If @force is NOT CHUNK_ALLOC_FORCE: * - return 0 if it doesn't need to allocate a new chunk, * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. @@ -3253,6 +3464,13 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, /* Don't re-enter if we're already allocating a chunk */ if (trans->allocating_chunk) return -ENOSPC; + /* + * If we are removing a chunk, don't re-enter or we would deadlock. + * System space reservation and system chunk allocation is done by the + * chunk remove operation (btrfs_remove_chunk()). + */ + if (trans->removing_chunk) + return -ENOSPC; space_info = btrfs_find_space_info(fs_info, flags); ASSERT(space_info); @@ -3316,13 +3534,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - /* - * Check if we have enough space in SYSTEM chunk because we may need - * to update devices. - */ - check_system_chunk(trans, flags); - - ret = btrfs_alloc_chunk(trans, flags); + ret = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; spin_lock(&space_info->lock); @@ -3341,22 +3553,6 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); mutex_unlock(&fs_info->chunk_mutex); - /* - * When we allocate a new chunk we reserve space in the chunk block - * reserve to make sure we can COW nodes/leafs in the chunk tree or - * add new nodes/leafs to it if we end up needing to do it when - * inserting the chunk item and updating device items as part of the - * second phase of chunk allocation, performed by - * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a - * large number of new block groups to create in our transaction - * handle's new_bgs list to avoid exhausting the chunk block reserve - * in extreme cases - like having a single transaction create many new - * block groups when starting to write out the free space caches of all - * the block groups that were made dirty during the lifetime of the - * transaction. - */ - if (trans->chunk_bytes_reserved >= (u64)SZ_2M) - btrfs_create_pending_block_groups(trans); return ret; } @@ -3409,14 +3605,31 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (left < thresh) { u64 flags = btrfs_system_alloc_profile(fs_info); + struct btrfs_block_group *bg; /* * Ignore failure to create system chunk. We might end up not * needing it, as we might not need to COW all nodes/leafs from * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). + * + * Also, if our caller is allocating a system chunk, do not + * attempt to insert the chunk item in the chunk btree, as we + * could deadlock on an extent buffer since our caller may be + * COWing an extent buffer from the chunk btree. */ - ret = btrfs_alloc_chunk(trans, flags); + bg = btrfs_alloc_chunk(trans, flags); + if (IS_ERR(bg)) { + ret = PTR_ERR(bg); + } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { + /* + * If we fail to add the chunk item here, we end up + * trying again at phase 2 of chunk allocation, at + * btrfs_create_pending_block_groups(). So ignore + * any error here. + */ + btrfs_chunk_alloc_add_chunk_item(trans, bg); + } } if (!ret) { diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 7b927425dc7156..c72a71efcb1872 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -97,6 +97,7 @@ struct btrfs_block_group { unsigned int removed:1; unsigned int to_copy:1; unsigned int relocating_repair:1; + unsigned int chunk_item_inserted:1; int disk_cache_state; @@ -268,8 +269,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work); void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info); void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg); int btrfs_read_block_groups(struct btrfs_fs_info *info); -int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, - u64 type, u64 chunk_offset, u64 size); +struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, + u64 bytes_used, u64 type, + u64 chunk_offset, u64 size); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans); int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, bool do_chunk_alloc); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4bc3ca2cbd7d41..c5c08c87e1303c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -364,49 +364,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, return 0; } -static struct extent_buffer *alloc_tree_block_no_bg_flush( - struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 parent_start, - const struct btrfs_disk_key *disk_key, - int level, - u64 hint, - u64 empty_size, - enum btrfs_lock_nesting nest) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct extent_buffer *ret; - - /* - * If we are COWing a node/leaf from the extent, chunk, device or free - * space trees, make sure that we do not finish block group creation of - * pending block groups. We do this to avoid a deadlock. - * COWing can result in allocation of a new chunk, and flushing pending - * block groups (btrfs_create_pending_block_groups()) can be triggered - * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk, device and free space trees, - * therefore we could deadlock with ourselves since we are holding a - * lock on an extent buffer that btrfs_create_pending_block_groups() may - * try to COW later. - * For similar reasons, we also need to delay flushing pending block - * groups when splitting a leaf or node, from one of those trees, since - * we are holding a write lock on it and its parent or when inserting a - * new root node for one of those trees. - */ - if (root == fs_info->extent_root || - root == fs_info->chunk_root || - root == fs_info->dev_root || - root == fs_info->free_space_root) - trans->can_flush_pending_bgs = false; - - ret = btrfs_alloc_tree_block(trans, root, parent_start, - root->root_key.objectid, disk_key, level, - hint, empty_size, nest); - trans->can_flush_pending_bgs = true; - - return ret; -} - /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked @@ -455,8 +412,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; - cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, - level, search_start, empty_size, nest); + cow = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, &disk_key, level, + search_start, empty_size, nest); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -2458,9 +2416,9 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, - root->node->start, 0, - BTRFS_NESTING_NEW_ROOT); + c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &lower_key, level, root->node->start, 0, + BTRFS_NESTING_NEW_ROOT); if (IS_ERR(c)) return PTR_ERR(c); @@ -2589,8 +2547,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, - c->start, 0, BTRFS_NESTING_SPLIT); + split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &disk_key, level, c->start, 0, + BTRFS_NESTING_SPLIT); if (IS_ERR(split)) return PTR_ERR(split); @@ -3381,10 +3340,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just * use BTRFS_NESTING_NEW_ROOT. */ - right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, - l->start, 0, num_doubles ? - BTRFS_NESTING_NEW_ROOT : - BTRFS_NESTING_SPLIT); + right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &disk_key, 0, l->start, 0, + num_doubles ? BTRFS_NESTING_NEW_ROOT : + BTRFS_NESTING_SPLIT); if (IS_ERR(right)) return PTR_ERR(right); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 443c348bc6f3f5..14b9fdc8aaa9a9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -254,8 +254,11 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans) } /* - * To be called after all the new block groups attached to the transaction - * handle have been created (btrfs_create_pending_block_groups()). + * To be called after doing the chunk btree updates right after allocating a new + * chunk (after btrfs_chunk_alloc_add_chunk_item() is called), when removing a + * chunk after all chunk btree updates and after finishing the second phase of + * chunk allocation (btrfs_create_pending_block_groups()) in case some block + * group had its chunk item insertion delayed to the second phase. */ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) { @@ -264,8 +267,6 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) if (!trans->chunk_bytes_reserved) return; - WARN_ON_ONCE(!list_empty(&trans->new_bgs)); - btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv, trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; @@ -696,7 +697,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, h->fs_info = root->fs_info; h->type = type; - h->can_flush_pending_bgs = true; INIT_LIST_HEAD(&h->new_bgs); smp_mb(); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index a18d67796b544b..ba45065f945118 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -132,7 +132,7 @@ struct btrfs_trans_handle { short aborted; bool adding_csums; bool allocating_chunk; - bool can_flush_pending_bgs; + bool removing_chunk; bool reloc_reserved; bool in_fsync; struct btrfs_root *root; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 782e16795bc40c..c6c14315b1c95e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1745,19 +1745,14 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); } else { - btrfs_handle_fs_error(fs_info, ret, "Slot search failed"); goto out; } *dev_extent_len = btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); - if (ret) { - btrfs_handle_fs_error(fs_info, ret, - "Failed to remove dev extent item"); - } else { + if (ret == 0) set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); - } out: btrfs_free_path(path); return ret; @@ -2942,7 +2937,7 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) u32 cur; struct btrfs_key key; - mutex_lock(&fs_info->chunk_mutex); + lockdep_assert_held(&fs_info->chunk_mutex); array_size = btrfs_super_sys_array_size(super_copy); ptr = super_copy->sys_chunk_array; @@ -2972,7 +2967,6 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) cur += len; } } - mutex_unlock(&fs_info->chunk_mutex); return ret; } @@ -3012,6 +3006,29 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, return em; } +static int remove_chunk_item(struct btrfs_trans_handle *trans, + struct map_lookup *map, u64 chunk_offset) +{ + int i; + + /* + * Removing chunk items and updating the device items in the chunks btree + * requires holding the chunk_mutex. + * See the comment at btrfs_chunk_alloc() for the details. + */ + lockdep_assert_held(&trans->fs_info->chunk_mutex); + + for (i = 0; i < map->num_stripes; i++) { + int ret; + + ret = btrfs_update_device(trans, map->stripes[i].dev); + if (ret) + return ret; + } + + return btrfs_free_chunk(trans, chunk_offset); +} + int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -3032,14 +3049,16 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) return PTR_ERR(em); } map = em->map_lookup; - mutex_lock(&fs_info->chunk_mutex); - check_system_chunk(trans, map->type); - mutex_unlock(&fs_info->chunk_mutex); /* - * Take the device list mutex to prevent races with the final phase of - * a device replace operation that replaces the device object associated - * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()). + * First delete the device extent items from the devices btree. + * We take the device_list_mutex to avoid racing with the finishing phase + * of a device replace operation. See the comment below before acquiring + * fs_info->chunk_mutex. Note that here we do not acquire the chunk_mutex + * because that can result in a deadlock when deleting the device extent + * items from the devices btree - COWing an extent buffer from the btree + * may result in allocating a new metadata chunk, which would attempt to + * lock again fs_info->chunk_mutex. */ mutex_lock(&fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { @@ -3061,18 +3080,73 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) btrfs_clear_space_info_full(fs_info); mutex_unlock(&fs_info->chunk_mutex); } + } + mutex_unlock(&fs_devices->device_list_mutex); - ret = btrfs_update_device(trans, device); + /* + * We acquire fs_info->chunk_mutex for 2 reasons: + * + * 1) Just like with the first phase of the chunk allocation, we must + * reserve system space, do all chunk btree updates and deletions, and + * update the system chunk array in the superblock while holding this + * mutex. This is for similar reasons as explained on the comment at + * the top of btrfs_chunk_alloc(); + * + * 2) Prevent races with the final phase of a device replace operation + * that replaces the device object associated with the map's stripes, + * because the device object's id can change at any time during that + * final phase of the device replace operation + * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the + * replaced device and then see it with an ID of + * BTRFS_DEV_REPLACE_DEVID, which would cause a failure when updating + * the device item, which does not exists on the chunk btree. + * The finishing phase of device replace acquires both the + * device_list_mutex and the chunk_mutex, in that order, so we are + * safe by just acquiring the chunk_mutex. + */ + trans->removing_chunk = true; + mutex_lock(&fs_info->chunk_mutex); + + check_system_chunk(trans, map->type); + + ret = remove_chunk_item(trans, map, chunk_offset); + /* + * Normally we should not get -ENOSPC since we reserved space before + * through the call to check_system_chunk(). + * + * Despite our system space_info having enough free space, we may not + * be able to allocate extents from its block groups, because all have + * an incompatible profile, which will force us to allocate a new system + * block group with the right profile, or right after we called + * check_system_space() above, a scrub turned the only system block group + * with enough free space into RO mode. + * This is explained with more detail at do_chunk_alloc(). + * + * So if we get -ENOSPC, allocate a new system chunk and retry once. + */ + if (ret == -ENOSPC) { + const u64 sys_flags = btrfs_system_alloc_profile(fs_info); + struct btrfs_block_group *sys_bg; + + sys_bg = btrfs_alloc_chunk(trans, sys_flags); + if (IS_ERR(sys_bg)) { + ret = PTR_ERR(sys_bg); + btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg); if (ret) { - mutex_unlock(&fs_devices->device_list_mutex); btrfs_abort_transaction(trans, ret); goto out; } - } - mutex_unlock(&fs_devices->device_list_mutex); - ret = btrfs_free_chunk(trans, chunk_offset); - if (ret) { + ret = remove_chunk_item(trans, map, chunk_offset); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } + } else if (ret) { btrfs_abort_transaction(trans, ret); goto out; } @@ -3087,6 +3161,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) } } + mutex_unlock(&fs_info->chunk_mutex); + trans->removing_chunk = false; + + /* + * We are done with chunk btree updates and deletions, so release the + * system space we previously reserved (with check_system_chunk()). + */ + btrfs_trans_release_chunk_metadata(trans); + ret = btrfs_remove_block_group(trans, chunk_offset, em); if (ret) { btrfs_abort_transaction(trans, ret); @@ -3094,6 +3177,10 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) } out: + if (trans->removing_chunk) { + mutex_unlock(&fs_info->chunk_mutex); + trans->removing_chunk = false; + } /* once for us */ free_extent_map(em); return ret; @@ -4860,13 +4947,12 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info, u32 array_size; u8 *ptr; - mutex_lock(&fs_info->chunk_mutex); + lockdep_assert_held(&fs_info->chunk_mutex); + array_size = btrfs_super_sys_array_size(super_copy); if (array_size + item_size + sizeof(disk_key) - > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { - mutex_unlock(&fs_info->chunk_mutex); + > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) return -EFBIG; - } ptr = super_copy->sys_chunk_array + array_size; btrfs_cpu_key_to_disk(&disk_key, key); @@ -4875,7 +4961,6 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info, memcpy(ptr, chunk, item_size); item_size += sizeof(disk_key); btrfs_set_super_sys_array_size(super_copy, array_size + item_size); - mutex_unlock(&fs_info->chunk_mutex); return 0; } @@ -5225,13 +5310,14 @@ static int decide_stripe_size(struct btrfs_fs_devices *fs_devices, } } -static int create_chunk(struct btrfs_trans_handle *trans, +static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, struct alloc_chunk_ctl *ctl, struct btrfs_device_info *devices_info) { struct btrfs_fs_info *info = trans->fs_info; struct map_lookup *map = NULL; struct extent_map_tree *em_tree; + struct btrfs_block_group *block_group; struct extent_map *em; u64 start = ctl->start; u64 type = ctl->type; @@ -5241,7 +5327,7 @@ static int create_chunk(struct btrfs_trans_handle *trans, map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS); if (!map) - return -ENOMEM; + return ERR_PTR(-ENOMEM); map->num_stripes = ctl->num_stripes; for (i = 0; i < ctl->ndevs; ++i) { @@ -5263,7 +5349,7 @@ static int create_chunk(struct btrfs_trans_handle *trans, em = alloc_extent_map(); if (!em) { kfree(map); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); em->map_lookup = map; @@ -5279,12 +5365,12 @@ static int create_chunk(struct btrfs_trans_handle *trans, if (ret) { write_unlock(&em_tree->lock); free_extent_map(em); - return ret; + return ERR_PTR(ret); } write_unlock(&em_tree->lock); - ret = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size); - if (ret) + block_group = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size); + if (IS_ERR(block_group)) goto error_del_extent; for (i = 0; i < map->num_stripes; i++) { @@ -5304,7 +5390,7 @@ static int create_chunk(struct btrfs_trans_handle *trans, check_raid56_incompat_flag(info, type); check_raid1c34_incompat_flag(info, type); - return 0; + return block_group; error_del_extent: write_lock(&em_tree->lock); @@ -5316,34 +5402,36 @@ static int create_chunk(struct btrfs_trans_handle *trans, /* One for the tree reference */ free_extent_map(em); - return ret; + return block_group; } -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) +struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + u64 type) { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_fs_devices *fs_devices = info->fs_devices; struct btrfs_device_info *devices_info = NULL; struct alloc_chunk_ctl ctl; + struct btrfs_block_group *block_group; int ret; lockdep_assert_held(&info->chunk_mutex); if (!alloc_profile_is_valid(type, 0)) { ASSERT(0); - return -EINVAL; + return ERR_PTR(-EINVAL); } if (list_empty(&fs_devices->alloc_list)) { if (btrfs_test_opt(info, ENOSPC_DEBUG)) btrfs_debug(info, "%s: no writable device", __func__); - return -ENOSPC; + return ERR_PTR(-ENOSPC); } if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { btrfs_err(info, "invalid chunk type 0x%llx requested", type); ASSERT(0); - return -EINVAL; + return ERR_PTR(-EINVAL); } ctl.start = find_next_chunk(info); @@ -5353,46 +5441,43 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), GFP_NOFS); if (!devices_info) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = gather_device_info(fs_devices, &ctl, devices_info); - if (ret < 0) + if (ret < 0) { + block_group = ERR_PTR(ret); goto out; + } ret = decide_stripe_size(fs_devices, &ctl, devices_info); - if (ret < 0) + if (ret < 0) { + block_group = ERR_PTR(ret); goto out; + } - ret = create_chunk(trans, &ctl, devices_info); + block_group = create_chunk(trans, &ctl, devices_info); out: kfree(devices_info); - return ret; + return block_group; } /* - * Chunk allocation falls into two parts. The first part does work - * that makes the new allocated chunk usable, but does not do any operation - * that modifies the chunk tree. The second part does the work that - * requires modifying the chunk tree. This division is important for the - * bootstrap process of adding storage to a seed btrfs. + * This function, btrfs_finish_chunk_alloc(), belongs to phase 2. + * + * See the comment at btrfs_chunk_alloc() for details about the chunk allocation + * phases. */ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, u64 chunk_offset, u64 chunk_size) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *chunk_root = fs_info->chunk_root; - struct btrfs_key key; struct btrfs_device *device; - struct btrfs_chunk *chunk; - struct btrfs_stripe *stripe; struct extent_map *em; struct map_lookup *map; - size_t item_size; u64 dev_offset; u64 stripe_size; - int i = 0; + int i; int ret = 0; em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size); @@ -5400,53 +5485,117 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, return PTR_ERR(em); map = em->map_lookup; - item_size = btrfs_chunk_item_size(map->num_stripes); stripe_size = em->orig_block_len; - chunk = kzalloc(item_size, GFP_NOFS); - if (!chunk) { - ret = -ENOMEM; - goto out; - } - /* * Take the device list mutex to prevent races with the final phase of * a device replace operation that replaces the device object associated * with the map's stripes, because the device object's id can change * at any time during that final phase of the device replace operation - * (dev-replace.c:btrfs_dev_replace_finishing()). + * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the + * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID, + * resulting in persisting a device extent item with such ID. */ mutex_lock(&fs_info->fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; - ret = btrfs_update_device(trans, device); - if (ret) - break; ret = btrfs_alloc_dev_extent(trans, device, chunk_offset, dev_offset, stripe_size); if (ret) break; } - if (ret) { - mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + + free_extent_map(em); + return ret; +} + +/* + * This function, btrfs_chunk_alloc_add_chunk_item(), typically belongs to the + * phase 1 of chunk allocation. It belongs to phase 2 only when allocating system + * chunks. + * + * See the comment at btrfs_chunk_alloc() for details about the chunk allocation + * phases. + */ +int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_key key; + struct btrfs_chunk *chunk; + struct btrfs_stripe *stripe; + struct extent_map *em; + struct map_lookup *map; + size_t item_size; + int i; + int ret; + + /* + * We take the chunk_mutex for 2 reasons: + * + * 1) Updates and insertions in the chunk btree must be done while holding + * the chunk_mutex, as well as updating the system chunk array in the + * superblock. See the comment on top of btrfs_chunk_alloc() for the + * details; + * + * 2) To prevent races with the final phase of a device replace operation + * that replaces the device object associated with the map's stripes, + * because the device object's id can change at any time during that + * final phase of the device replace operation + * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the + * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID, + * which would cause a failure when updating the device item, which does + * not exists, or persisting a stripe of the chunk item with such ID. + * Here we can't use the device_list_mutex because our caller already + * has locked the chunk_mutex, and the final phase of device replace + * acquires both mutexes - first the device_list_mutex and then the + * chunk_mutex. Using any of those two mutexes protects us from a + * concurrent device replace. + */ + lockdep_assert_held(&fs_info->chunk_mutex); + + em = btrfs_get_chunk_map(fs_info, bg->start, bg->length); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + btrfs_abort_transaction(trans, ret); + return ret; + } + + map = em->map_lookup; + item_size = btrfs_chunk_item_size(map->num_stripes); + + chunk = kzalloc(item_size, GFP_NOFS); + if (!chunk) { + ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); goto out; } + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device = map->stripes[i].dev; + + ret = btrfs_update_device(trans, device); + if (ret) + goto out; + } + stripe = &chunk->stripe; for (i = 0; i < map->num_stripes; i++) { - device = map->stripes[i].dev; - dev_offset = map->stripes[i].physical; + struct btrfs_device *device = map->stripes[i].dev; + const u64 dev_offset = map->stripes[i].physical; btrfs_set_stack_stripe_devid(stripe, device->devid); btrfs_set_stack_stripe_offset(stripe, dev_offset); memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); stripe++; } - mutex_unlock(&fs_info->fs_devices->device_list_mutex); - btrfs_set_stack_chunk_length(chunk, chunk_size); + btrfs_set_stack_chunk_length(chunk, bg->length); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len); btrfs_set_stack_chunk_type(chunk, map->type); @@ -5458,15 +5607,18 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.type = BTRFS_CHUNK_ITEM_KEY; - key.offset = chunk_offset; + key.offset = bg->start; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); - if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - /* - * TODO: Cleanup of inserted chunk root in case of - * failure. - */ + if (ret) + goto out; + + bg->chunk_item_inserted = 1; + + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size); + if (ret) + goto out; } out: @@ -5479,16 +5631,41 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; u64 alloc_profile; - int ret; + struct btrfs_block_group *meta_bg; + struct btrfs_block_group *sys_bg; + + /* + * When adding a new device for sprouting, the seed device is read-only + * so we must first allocate a metadata and a system chunk. But before + * adding the block group items to the extent, device and chunk btrees, + * we must first: + * + * 1) Create both chunks without doing any changes to the btrees, as + * otherwise we would get -ENOSPC since the block groups from the + * seed device are read-only; + * + * 2) Add the device item for the new sprout device - finishing the setup + * of a new block group requires updating the device item in the chunk + * btree, so it must exist when we attempt to do it. The previous step + * ensures this does not fail with -ENOSPC. + * + * After that we can add the block group items to their btrees: + * update existing device item in the chunk btree, add a new block group + * item to the extent btree, add a new chunk item to the chunk btree and + * finally add the new device extent items to the devices btree. + */ alloc_profile = btrfs_metadata_alloc_profile(fs_info); - ret = btrfs_alloc_chunk(trans, alloc_profile); - if (ret) - return ret; + meta_bg = btrfs_alloc_chunk(trans, alloc_profile); + if (IS_ERR(meta_bg)) + return PTR_ERR(meta_bg); alloc_profile = btrfs_system_alloc_profile(fs_info); - ret = btrfs_alloc_chunk(trans, alloc_profile); - return ret; + sys_bg = btrfs_alloc_chunk(trans, alloc_profile); + if (IS_ERR(sys_bg)) + return PTR_ERR(sys_bg); + + return 0; } static inline int btrfs_chunk_max_errors(struct map_lookup *map) @@ -7415,10 +7592,18 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) total_dev++; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; + + /* + * We are only called at mount time, so no need to take + * fs_info->chunk_mutex. Plus, to avoid lockdep warnings, + * we always lock first fs_info->chunk_mutex before + * acquiring any locks on the chunk tree. This is a + * requirement for chunk allocation, see the comment on + * top of btrfs_chunk_alloc() for details. + */ + ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags)); chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - mutex_lock(&fs_info->chunk_mutex); ret = read_one_chunk(&found_key, leaf, chunk); - mutex_unlock(&fs_info->chunk_mutex); if (ret) goto error; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index c7fc7caf575c0d..55a8ba244716b9 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -450,7 +450,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map, struct btrfs_io_geometry *io_geom); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); +struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num); @@ -509,6 +510,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, u64 logical); int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, u64 chunk_offset, u64 chunk_size); +int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg); int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, u64 logical, u64 length); From abb99cfdaf0759f8a619e5fecf52ccccdf310c8c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 28 Jun 2021 17:57:28 +0900 Subject: [PATCH 146/714] btrfs: properly split extent_map for REQ_OP_ZONE_APPEND Damien reported a test failure with btrfs/209. The test itself ran fine, but the fsck ran afterwards reported a corrupted filesystem. The filesystem corruption happens because we're splitting an extent and then writing the extent twice. We have to split the extent though, because we're creating too large extents for a REQ_OP_ZONE_APPEND operation. When dumping the extent tree, we can see two EXTENT_ITEMs at the same start address but different lengths. $ btrfs inspect dump-tree /dev/nullb1 -t extent ... item 19 key (269484032 EXTENT_ITEM 126976) itemoff 15470 itemsize 53 refs 1 gen 7 flags DATA extent data backref root FS_TREE objectid 257 offset 786432 count 1 item 20 key (269484032 EXTENT_ITEM 262144) itemoff 15417 itemsize 53 refs 1 gen 7 flags DATA extent data backref root FS_TREE objectid 257 offset 786432 count 1 The duplicated EXTENT_ITEMs originally come from wrongly split extent_map in extract_ordered_extent(). Since extract_ordered_extent() uses create_io_em() to split an existing extent_map, we will have split->orig_start != split->start. Then, it will be logged with non-zero "extent data offset". Finally, the logged entries are replayed into a duplicated EXTENT_ITEM. Introduce and use proper splitting function for extent_map. The function is intended to be simple and specific usage for extract_ordered_extent() e.g. not supporting compression case (we do not allow splitting compressed extent_map anyway). There was a question raised by Qu, in summary why we want to split the extent map (and not the bio): The problem is not the limit on the zone end, which as you mention is the same as the block group end. The problem is that data write use zone append (ZA) operations. ZA BIOs cannot be split so a large extent may need to be processed with multiple ZA BIOs, While that is also true for regular writes, the major difference is that ZA are "nameless" write operation giving back the written sectors on completion. And ZA operations may be reordered by the block layer (not intentionally though). Combine both of these characteristics and you can see that the data for a large extent may end up being shuffled when written resulting in data corruption and the impossibility to map the extent to some start sector. To avoid this problem, zoned btrfs uses the principle "one data extent == one ZA BIO". So large extents need to be split. This is unfortunate, but we can revisit this later and optimize, e.g. merge back together the fragments of an extent once written if they actually were written sequentially in the zone. Reported-by: Damien Le Moal Fixes: d22002fd37bd ("btrfs: zoned: split ordered extent when bio is sent") CC: stable@vger.kernel.org # 5.12+ CC: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 147 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 118 insertions(+), 29 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e6eb20987351d6..8f60314c36c55e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2271,13 +2271,127 @@ static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio, return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); } +/* + * Split an extent_map at [start, start + len] + * + * This function is intended to be used only for extract_ordered_extent(). + */ +static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len, + u64 pre, u64 post) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + struct extent_map *split_pre = NULL; + struct extent_map *split_mid = NULL; + struct extent_map *split_post = NULL; + int ret = 0; + int modified; + unsigned long flags; + + /* Sanity check */ + if (pre == 0 && post == 0) + return 0; + + split_pre = alloc_extent_map(); + if (pre) + split_mid = alloc_extent_map(); + if (post) + split_post = alloc_extent_map(); + if (!split_pre || (pre && !split_mid) || (post && !split_post)) { + ret = -ENOMEM; + goto out; + } + + ASSERT(pre + post < len); + + lock_extent(&inode->io_tree, start, start + len - 1); + write_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { + ret = -EIO; + goto out_unlock; + } + + ASSERT(em->len == len); + ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)); + ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE); + + flags = em->flags; + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + clear_bit(EXTENT_FLAG_LOGGING, &flags); + modified = !list_empty(&em->list); + + /* First, replace the em with a new extent_map starting from * em->start */ + split_pre->start = em->start; + split_pre->len = (pre ? pre : em->len - post); + split_pre->orig_start = split_pre->start; + split_pre->block_start = em->block_start; + split_pre->block_len = split_pre->len; + split_pre->orig_block_len = split_pre->block_len; + split_pre->ram_bytes = split_pre->len; + split_pre->flags = flags; + split_pre->compress_type = em->compress_type; + split_pre->generation = em->generation; + + replace_extent_mapping(em_tree, em, split_pre, modified); + + /* + * Now we only have an extent_map at: + * [em->start, em->start + pre] if pre != 0 + * [em->start, em->start + em->len - post] if pre == 0 + */ + + if (pre) { + /* Insert the middle extent_map */ + split_mid->start = em->start + pre; + split_mid->len = em->len - pre - post; + split_mid->orig_start = split_mid->start; + split_mid->block_start = em->block_start + pre; + split_mid->block_len = split_mid->len; + split_mid->orig_block_len = split_mid->block_len; + split_mid->ram_bytes = split_mid->len; + split_mid->flags = flags; + split_mid->compress_type = em->compress_type; + split_mid->generation = em->generation; + add_extent_mapping(em_tree, split_mid, modified); + } + + if (post) { + split_post->start = em->start + em->len - post; + split_post->len = post; + split_post->orig_start = split_post->start; + split_post->block_start = em->block_start + em->len - post; + split_post->block_len = split_post->len; + split_post->orig_block_len = split_post->block_len; + split_post->ram_bytes = split_post->len; + split_post->flags = flags; + split_post->compress_type = em->compress_type; + split_post->generation = em->generation; + add_extent_mapping(em_tree, split_post, modified); + } + + /* Once for us */ + free_extent_map(em); + /* Once for the tree */ + free_extent_map(em); + +out_unlock: + write_unlock(&em_tree->lock); + unlock_extent(&inode->io_tree, start, start + len - 1); +out: + free_extent_map(split_pre); + free_extent_map(split_mid); + free_extent_map(split_post); + + return ret; +} + static blk_status_t extract_ordered_extent(struct btrfs_inode *inode, struct bio *bio, loff_t file_offset) { struct btrfs_ordered_extent *ordered; - struct extent_map *em = NULL, *em_new = NULL; - struct extent_map_tree *em_tree = &inode->extent_tree; u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT; + u64 file_len; u64 len = bio->bi_iter.bi_size; u64 end = start + len; u64 ordered_end; @@ -2317,41 +2431,16 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode, goto out; } + file_len = ordered->num_bytes; pre = start - ordered->disk_bytenr; post = ordered_end - end; ret = btrfs_split_ordered_extent(ordered, pre, post); if (ret) goto out; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, ordered->file_offset, len); - if (!em) { - read_unlock(&em_tree->lock); - ret = -EIO; - goto out; - } - read_unlock(&em_tree->lock); - - ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)); - /* - * We cannot reuse em_new here but have to create a new one, as - * unpin_extent_cache() expects the start of the extent map to be the - * logical offset of the file, which does not hold true anymore after - * splitting. - */ - em_new = create_io_em(inode, em->start + pre, len, - em->start + pre, em->block_start + pre, len, - len, len, BTRFS_COMPRESS_NONE, - BTRFS_ORDERED_REGULAR); - if (IS_ERR(em_new)) { - ret = PTR_ERR(em_new); - goto out; - } - free_extent_map(em_new); + ret = split_zoned_em(inode, file_offset, file_len, pre, post); out: - free_extent_map(em); btrfs_put_ordered_extent(ordered); return errno_to_blk_status(ret); From 9cc0b837e14ae913581ec1ea6e979a738f71b0fd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 6 Jul 2021 01:32:38 +0900 Subject: [PATCH 147/714] btrfs: don't block if we can't acquire the reclaim lock If we can't acquire the reclaim_bgs_lock on block group reclaim, we block until it is free. This can potentially stall for a long time. While reclaim of block groups is necessary for a good user experience on a zoned file system, there still is no need to block as it is best effort only, just like when we're deleting unused block groups. CC: stable@vger.kernel.org # 5.13 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c557327b454582..9e7d9d0c763dd1 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1498,7 +1498,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) return; - mutex_lock(&fs_info->reclaim_bgs_lock); + /* + * Long running balances can keep us blocked here for eternity, so + * simply skip reclaim if we're unable to get the mutex. + */ + if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { + btrfs_exclop_finish(fs_info); + return; + } + spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->reclaim_bgs)) { u64 zone_unusable; From ea32af47f00a046a1f953370514d6d946efe0152 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 7 Jul 2021 12:23:45 +0100 Subject: [PATCH 148/714] btrfs: zoned: fix wrong mutex unlock on failure to allocate log root tree When syncing the log, if we fail to allocate the root node for the log root tree: 1) We are unlocking fs_info->tree_log_mutex, but at this point we have not yet locked this mutex; 2) We have locked fs_info->tree_root->log_mutex, but we end up not unlocking it; So fix this by unlocking fs_info->tree_root->log_mutex instead of fs_info->tree_log_mutex. Fixes: e75f9fd194090e ("btrfs: zoned: move log tree node allocation out of log_root_tree->log_mutex") CC: stable@vger.kernel.org # 5.13+ Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cab451d19547ae..dc6eb088d73e34 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3173,7 +3173,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); + mutex_unlock(&fs_info->tree_root->log_mutex); goto out; } } From 7999d2555c9f879d006ea8469d74db9cdb038af0 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 7 Jul 2021 10:27:00 +0200 Subject: [PATCH 149/714] spi: stm32: fixes pm_runtime calls in probe/remove Add pm_runtime calls in probe/probe error path and remove in order to be consistent in all places in ordering and ensure that pm_runtime is disabled prior to resources used by the SPI controller. This patch also fixes the 2 following warnings on driver remove: WARNING: CPU: 0 PID: 743 at drivers/clk/clk.c:594 clk_core_disable_lock+0x18/0x24 WARNING: CPU: 0 PID: 743 at drivers/clk/clk.c:476 clk_unprepare+0x24/0x2c Fixes: 038ac869c9d2 ("spi: stm32: add runtime PM support") Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1625646426-5826-2-git-send-email-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 65b37c8dc49f02..05618a618939c0 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -1928,6 +1928,7 @@ static int stm32_spi_probe(struct platform_device *pdev) master->can_dma = stm32_spi_can_dma; pm_runtime_set_active(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); pm_runtime_enable(&pdev->dev); ret = spi_register_master(master); @@ -1943,6 +1944,8 @@ static int stm32_spi_probe(struct platform_device *pdev) err_pm_disable: pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); err_dma_release: if (spi->dma_tx) dma_release_channel(spi->dma_tx); @@ -1959,9 +1962,14 @@ static int stm32_spi_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct stm32_spi *spi = spi_master_get_devdata(master); + pm_runtime_get_sync(&pdev->dev); + spi_unregister_master(master); spi->cfg->disable(spi); + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); if (master->dma_tx) dma_release_channel(master->dma_tx); if (master->dma_rx) @@ -1969,7 +1977,6 @@ static int stm32_spi_remove(struct platform_device *pdev) clk_disable_unprepare(spi->clk); - pm_runtime_disable(&pdev->dev); pinctrl_pm_select_sleep_state(&pdev->dev); From 5616e895ecc56db8ba959e53638031a21353e0e2 Mon Sep 17 00:00:00 2001 From: SanjayKumar Jeyakumar Date: Wed, 7 Jul 2021 10:59:14 +0530 Subject: [PATCH 150/714] tools/runqslower: Use __state instead of state Commit 2f064a59a11f ("sched: Change task_struct::state") renamed task->state to task->__state in task_struct. Fix runqslower to use the new name of the field. Fixes: 2f064a59a11f ("sched: Change task_struct::state") Signed-off-by: SanjayKumar Jeyakumar Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707052914.21473-1-vjsanjay@gmail.com --- tools/bpf/runqslower/runqslower.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 645530ca7e9858..ab9353f2fd46ab 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -74,7 +74,7 @@ int handle__sched_switch(u64 *ctx) u32 pid; /* ivcsw: treat like an enqueue event and store timestamp */ - if (prev->state == TASK_RUNNING) + if (prev->__state == TASK_RUNNING) trace_enqueue(prev); pid = next->pid; From 0d472c69c6a5e22cef9e5809e2f6d0ccd5934f4a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Jul 2021 15:50:57 +0800 Subject: [PATCH 151/714] stmmac: dwmac-loongson: Fix unsigned comparison to zero plat->phy_interface is unsigned integer, so the condition can't be less than zero and the warning will never printed. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index e108b0d2bd2888..4c9a37dd0d3ff0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -49,9 +49,9 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id { struct plat_stmmacenet_data *plat; struct stmmac_resources res; - bool mdio = false; - int ret, i; struct device_node *np; + int ret, i, phy_mode; + bool mdio = false; np = dev_of_node(&pdev->dev); @@ -108,10 +108,11 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (plat->bus_id < 0) plat->bus_id = pci_dev_id(pdev); - plat->phy_interface = device_get_phy_mode(&pdev->dev); - if (plat->phy_interface < 0) + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) dev_err(&pdev->dev, "phy_mode not found\n"); + plat->phy_interface = phy_mode; plat->interface = PHY_INTERFACE_MODE_GMII; pci_set_master(pdev); From eca81f09145d765c21dd8fb1ba5d874ca255c32c Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Jul 2021 15:53:35 +0800 Subject: [PATCH 152/714] stmmac: platform: Fix signedness bug in stmmac_probe_config_dt() The "plat->phy_interface" variable is an enum and in this context GCC will treat it as an unsigned int so the error handling is never triggered. Fixes: b9f0b2f634c0 ("net: stmmac: platform: fix probe for ACPI devices") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 072eff8079d030..5ca710844cc1eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -397,6 +397,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; struct stmmac_dma_cfg *dma_cfg; + int phy_mode; void *ret; int rc; @@ -412,10 +413,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) eth_zero_addr(mac); } - plat->phy_interface = device_get_phy_mode(&pdev->dev); - if (plat->phy_interface < 0) - return ERR_PTR(plat->phy_interface); + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) + return ERR_PTR(phy_mode); + plat->phy_interface = phy_mode; plat->interface = stmmac_of_get_mac_mode(np); if (plat->interface < 0) plat->interface = plat->phy_interface; From 24b671aad4eae423e1abf5b7f08d9a5235458b8d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 7 Jul 2021 16:15:29 +0800 Subject: [PATCH 153/714] selftests: icmp_redirect: remove from checking for IPv6 route get If the kernel doesn't enable option CONFIG_IPV6_SUBTREES, the RTA_SRC info will not be exported to userspace in rt6_fill_node(). And ip cmd will not print "from ::" to the route output. So remove this check. Fixes: ec8105352869 ("selftests: Add redirect tests") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp_redirect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index c19ecc6a861413..3a111ac1edc369 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -315,7 +315,7 @@ check_exception() if [ "$with_redirect" = "yes" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ - grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" + grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0.*${mtu}" elif [ -n "${mtu}" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" From 0e02bf5de46ae30074a2e1a8194a422a84482a1a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 7 Jul 2021 16:15:30 +0800 Subject: [PATCH 154/714] selftests: icmp_redirect: IPv6 PMTU info should be cleared after redirect After redirecting, it's already a new path. So the old PMTU info should be cleared. The IPv6 test "mtu exception plus redirect" should only has redirect info without old PMTU. The IPv4 test can not be changed because of legacy. Fixes: ec8105352869 ("selftests: Add redirect tests") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp_redirect.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index 3a111ac1edc369..ecbf57f264ed93 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -313,9 +313,10 @@ check_exception() fi log_test $? 0 "IPv4: ${desc}" - if [ "$with_redirect" = "yes" ]; then + # No PMTU info for test "redirect" and "mtu exception plus redirect" + if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ - grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0.*${mtu}" + grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" From 1d719254c139fb62fb8056fb496b6fd007e71550 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Mon, 28 Jun 2021 11:04:09 +0800 Subject: [PATCH 155/714] tools: bpf: Fix error in 'make -C tools/ bpf_install' make[2]: *** No rule to make target 'install'. Stop. make[1]: *** [Makefile:122: runqslower_install] Error 2 make: *** [Makefile:116: bpf_install] Error 2 There is no rule for target 'install' in tools/bpf/runqslower/Makefile, and there is no need to install it, so just remove 'runqslower_install'. Fixes: 9c01546d26d2 ("tools/bpf: Add runqslower tool to tools/bpf") Signed-off-by: Wei Li Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210628030409.3459095-1-liwei391@huawei.com --- tools/bpf/Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 39bb322707b4ba..b11cfc86a3d021 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -97,7 +97,7 @@ clean: bpftool_clean runqslower_clean resolve_btfids_clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -r -- $(OUTPUT)feature -install: $(PROGS) bpftool_install runqslower_install +install: $(PROGS) bpftool_install $(call QUIET_INSTALL, bpf_jit_disasm) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm @@ -118,9 +118,6 @@ bpftool_clean: runqslower: $(call descend,runqslower) -runqslower_install: - $(call descend,runqslower,install) - runqslower_clean: $(call descend,runqslower,clean) @@ -131,5 +128,5 @@ resolve_btfids_clean: $(call descend,resolve_btfids,clean) .PHONY: all install clean bpftool bpftool_install bpftool_clean \ - runqslower runqslower_install runqslower_clean \ + runqslower runqslower_clean \ resolve_btfids resolve_btfids_clean From 271dbc31843244e5192f0f8a8be0da26995f944a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Jul 2021 13:01:00 +0300 Subject: [PATCH 156/714] sock: unlock on error in sock_setsockopt() If copy_from_sockptr() then we need to unlock before returning. Fixes: d463126e23f1 ("net: sock: extend SO_TIMESTAMPING for PHC binding") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/sock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 1c4b0468bc2c30..a3eea6e0b30a7d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1123,8 +1123,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_TIMESTAMPING_OLD: if (optlen == sizeof(timestamping)) { if (copy_from_sockptr(×tamping, optval, - sizeof(timestamping))) - return -EFAULT; + sizeof(timestamping))) { + ret = -EFAULT; + break; + } } else { memset(×tamping, 0, sizeof(timestamping)); timestamping.flags = val; From 92c4bed59bc0ef2a92b99ca1fe1c7107ffa3125c Mon Sep 17 00:00:00 2001 From: "Roy, UjjaL" Date: Wed, 7 Jul 2021 23:48:33 +0530 Subject: [PATCH 157/714] ipmr: Fix indentation issue Fixed indentation by removing extra spaces. Signed-off-by: Roy, UjjaL Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7b12a40dd465d7..2dda856ca26025 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2119,7 +2119,7 @@ int ip_mr_input(struct sk_buff *skb) raw_rcv(mroute_sk, skb); return 0; } - } + } } /* already under rcu_read_lock() */ From 9615fe36b31d926f1c5107013b772dc226a6a7ca Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 7 Jul 2021 21:10:51 -0700 Subject: [PATCH 158/714] skbuff: Fix build with SKB extensions disabled We will fail to build with CONFIG_SKB_EXTENSIONS disabled after 8550ff8d8c75 ("skbuff: Release nfct refcount on napi stolen or re-used skbs") since there is an unconditionally use of skb_ext_find() without an appropriate stub. Simply build the code conditionally and properly guard against both COFNIG_SKB_EXTENSIONS as well as CONFIG_NET_TC_SKB_EXT being disabled. Fixes: Fixes: 8550ff8d8c75 ("skbuff: Release nfct refcount on napi stolen or re-used skbs") Signed-off-by: Florian Fainelli Reviewed-by: Roi Dayan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 177a5aec0b6bef..03c95a0867bb36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6010,7 +6010,7 @@ static void gro_list_prepare(const struct list_head *head, maclen); diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); - +#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) if (!diffs) { struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT); struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT); @@ -6019,6 +6019,7 @@ static void gro_list_prepare(const struct list_head *head, if (!diffs && unlikely(skb_ext)) diffs |= p_ext->chain ^ skb_ext->chain; } +#endif NAPI_GRO_CB(p)->same_flow = !diffs; } From c90b4503ccf42d9d367e843c223df44aa550e82a Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Wed, 7 Jul 2021 08:45:31 +0800 Subject: [PATCH 159/714] drm/i915/gvt: Clear d3_entered on elsp cmd submission. d3_entered flag is used to mark for vgpu_reset a previous power transition from D3->D0, typically for VM resume from S3, so that gvt could skip PPGTT invalidation in current vgpu_reset during resuming. In case S0ix exit, although there is D3->D0, guest driver continue to use vgpu as normal, with d3_entered set, until next shutdown/reboot or power transition. If a reboot follows a S0ix exit, device power state transite as: D0->D3->D0->D0(reboot), while system power state transites as: S0->S0 (reboot). There is no vgpu_reset until D0(reboot), thus d3_entered won't be cleared, the vgpu_reset will skip PPGTT invalidation however those PPGTT entries are no longer valid. Err appears like: gvt: vgpu 2: vfio_pin_pages failed for gfn 0xxxxx, ret -22 gvt: vgpu 2: fail: spt xxxx guest entry 0xxxxx type 2 gvt: vgpu 2: fail: shadow page xxxx guest entry 0xxxxx type 2. Give gvt a chance to clear d3_entered on elsp cmd submission so that the states before & after S0ix enter/exit are consistent. Fixes: ba25d977571e ("drm/i915/gvt: Do not destroy ppgtt_mm during vGPU D3->D0.") Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20210707004531.4873-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index dda320749c65c9..2358c92733b0d0 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (drm_WARN_ON(&i915->drm, !engine)) return -EINVAL; + /* + * Due to d3_entered is used to indicate skipping PPGTT invalidation on + * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after + * vGPU reset if in resuming. + * In S0ix exit, the device power state also transite from D3 to D0 as + * S3 resume, but no vGPU reset (triggered by QEMU devic model). After + * S0ix exit, all engines continue to work. However the d3_entered + * remains set which will break next vGPU reset logic (miss the expected + * PPGTT invalidation). + * Engines can only work in D0. Thus the 1st elsp write gives GVT a + * chance to clear d3_entered. + */ + if (vgpu->d3_entered) + vgpu->d3_entered = false; + execlist = &vgpu->submission.execlist[engine->id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; From 65e2e6c1c20104ed19060a38f4edbf14e9f9a9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 1 Jul 2021 10:27:51 +0200 Subject: [PATCH 160/714] pwm: sprd: Ensure configuring period and duty_cycle isn't wrongly skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last call to sprd_pwm_apply() might have exited early if state->enabled was false, the values for period and duty_cycle stored in pwm->state might not have been written to hardware and it must be ensured that they are configured before enabling the PWM. Fixes: 8aae4b02e8a6 ("pwm: sprd: Add Spreadtrum PWM support") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sprd.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index f2a85e8dd94126..7004f55bbf1155 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -183,13 +183,10 @@ static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } } - if (state->period != cstate->period || - state->duty_cycle != cstate->duty_cycle) { - ret = sprd_pwm_config(spc, pwm, state->duty_cycle, - state->period); - if (ret) - return ret; - } + ret = sprd_pwm_config(spc, pwm, state->duty_cycle, + state->period); + if (ret) + return ret; sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 1); } else if (cstate->enabled) { From fe8255f80b8617bc67a58f4ca2b54b3b9b32ff84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 1 Jul 2021 10:27:52 +0200 Subject: [PATCH 161/714] pwm: spear: Ensure configuring period and duty_cycle isn't wrongly skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last call to spear_pwm_apply() might have exited early if state->enabled was false, the values for period and duty_cycle stored in pwm->state might not have been written to hardware and it must be ensured that they are configured before enabling the PWM. Fixes: 98761ce4b91b ("pwm: spear: Implement .apply() callback") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-spear.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 48c31dac2f32bf..54c7990967dd4e 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -177,12 +177,9 @@ static int spear_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } - if (state->period != pwm->state.period || - state->duty_cycle != pwm->state.duty_cycle) { - err = spear_pwm_config(chip, pwm, state->duty_cycle, state->period); - if (err) - return err; - } + err = spear_pwm_config(chip, pwm, state->duty_cycle, state->period); + if (err) + return err; if (!pwm->state.enabled) return spear_pwm_enable(chip, pwm); From 25f70b8f3d15b8c188ecb49c6007b86f3e775c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 1 Jul 2021 10:27:53 +0200 Subject: [PATCH 162/714] pwm: tiecap: Ensure configuring period and duty_cycle isn't wrongly skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last call to ecap_pwm_apply() might have exited early if state->enabled was false, the values for period and duty_cycle stored in pwm->state might not have been written to hardware and it must be ensured that they are configured before enabling the PWM. Fixes: 0ca7acd84766 ("pwm: tiecap: Implement .apply() callback") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tiecap.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index dec3f1fb150c9a..35eb19a5a0d114 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -189,16 +189,13 @@ static int ecap_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } - if (state->period != pwm->state.period || - state->duty_cycle != pwm->state.duty_cycle) { - if (state->period > NSEC_PER_SEC) - return -ERANGE; + if (state->period > NSEC_PER_SEC) + return -ERANGE; - err = ecap_pwm_config(chip, pwm, state->duty_cycle, - state->period, enabled); - if (err) - return err; - } + err = ecap_pwm_config(chip, pwm, state->duty_cycle, + state->period, enabled); + if (err) + return err; if (!enabled) return ecap_pwm_enable(chip, pwm); From 7d6d4aaf2809f209c7d6bcab2f9ae76797051cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 1 Jul 2021 10:27:54 +0200 Subject: [PATCH 163/714] pwm: berlin: Ensure configuring period and duty_cycle isn't wrongly skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last call to berlin_pwm_apply() might have exited early if state->enabled was false, the values for period and duty_cycle stored in pwm->state might not have been written to hardware and it must be ensured that they are configured before enabling the PWM. Fixes: 30dffb42fcd4 ("pwm: berlin: Implement .apply() callback") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-berlin.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 5537b5f6dd5db2..e157273fd2f70c 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -190,12 +190,9 @@ static int berlin_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } - if (state->period != pwm->state.period || - state->duty_cycle != pwm->state.duty_cycle) { - err = berlin_pwm_config(chip, pwm, state->duty_cycle, state->period); - if (err) - return err; - } + err = berlin_pwm_config(chip, pwm, state->duty_cycle, state->period); + if (err) + return err; if (!enabled) return berlin_pwm_enable(chip, pwm); From f4a8e31ed84ec646c158824f423cb22d1f362bbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 1 Jul 2021 10:27:55 +0200 Subject: [PATCH 164/714] pwm: ep93xx: Ensure configuring period and duty_cycle isn't wrongly skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the last call to ep93xx_pwm_apply() might have exited early if state->enabled was false, the values for period and duty_cycle stored in pwm->state might not have been written to hardware and it must be ensured that they are configured before enabling the PWM. Fixes: 6d45374af539 ("pwm: ep93xx: Implement .apply callback") Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-ep93xx.c | 85 +++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index 8a3d781e6514c4..fc3cb7d669c6f3 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -64,6 +64,11 @@ static int ep93xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, int ret; struct ep93xx_pwm *ep93xx_pwm = to_ep93xx_pwm(chip); bool enabled = state->enabled; + void __iomem *base = ep93xx_pwm->base; + unsigned long long c; + unsigned long period_cycles; + unsigned long duty_cycles; + unsigned long term; if (state->polarity != pwm->state.polarity) { if (enabled) { @@ -97,57 +102,47 @@ static int ep93xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } - if (state->period != pwm->state.period || - state->duty_cycle != pwm->state.duty_cycle) { - struct ep93xx_pwm *ep93xx_pwm = to_ep93xx_pwm(chip); - void __iomem *base = ep93xx_pwm->base; - unsigned long long c; - unsigned long period_cycles; - unsigned long duty_cycles; - unsigned long term; + /* + * The clock needs to be enabled to access the PWM registers. + * Configuration can be changed at any time. + */ + if (!pwm_is_enabled(pwm)) { + ret = clk_prepare_enable(ep93xx_pwm->clk); + if (ret) + return ret; + } - /* - * The clock needs to be enabled to access the PWM registers. - * Configuration can be changed at any time. - */ - if (!pwm_is_enabled(pwm)) { - ret = clk_prepare_enable(ep93xx_pwm->clk); - if (ret) - return ret; - } + c = clk_get_rate(ep93xx_pwm->clk); + c *= state->period; + do_div(c, 1000000000); + period_cycles = c; + + c = period_cycles; + c *= state->duty_cycle; + do_div(c, state->period); + duty_cycles = c; - c = clk_get_rate(ep93xx_pwm->clk); - c *= state->period; - do_div(c, 1000000000); - period_cycles = c; - - c = period_cycles; - c *= state->duty_cycle; - do_div(c, state->period); - duty_cycles = c; - - if (period_cycles < 0x10000 && duty_cycles < 0x10000) { - term = readw(base + EP93XX_PWMx_TERM_COUNT); - - /* Order is important if PWM is running */ - if (period_cycles > term) { - writew(period_cycles, base + EP93XX_PWMx_TERM_COUNT); - writew(duty_cycles, base + EP93XX_PWMx_DUTY_CYCLE); - } else { - writew(duty_cycles, base + EP93XX_PWMx_DUTY_CYCLE); - writew(period_cycles, base + EP93XX_PWMx_TERM_COUNT); - } - ret = 0; + if (period_cycles < 0x10000 && duty_cycles < 0x10000) { + term = readw(base + EP93XX_PWMx_TERM_COUNT); + + /* Order is important if PWM is running */ + if (period_cycles > term) { + writew(period_cycles, base + EP93XX_PWMx_TERM_COUNT); + writew(duty_cycles, base + EP93XX_PWMx_DUTY_CYCLE); } else { - ret = -EINVAL; + writew(duty_cycles, base + EP93XX_PWMx_DUTY_CYCLE); + writew(period_cycles, base + EP93XX_PWMx_TERM_COUNT); } + ret = 0; + } else { + ret = -EINVAL; + } - if (!pwm_is_enabled(pwm)) - clk_disable_unprepare(ep93xx_pwm->clk); + if (!pwm_is_enabled(pwm)) + clk_disable_unprepare(ep93xx_pwm->clk); - if (ret) - return ret; - } + if (ret) + return ret; if (!enabled) { ret = clk_prepare_enable(ep93xx_pwm->clk); From c7bb4b89033b764eb07db4e060548a6311d801ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Jul 2021 00:21:09 -0700 Subject: [PATCH 165/714] ipv6: tcp: drop silly ICMPv6 packet too big messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While TCP stack scales reasonably well, there is still one part that can be used to DDOS it. IPv6 Packet too big messages have to lookup/insert a new route, and if abused by attackers, can easily put hosts under high stress, with many cpus contending on a spinlock while one is stuck in fib6_run_gc() ip6_protocol_deliver_rcu() icmpv6_rcv() icmpv6_notify() tcp_v6_err() tcp_v6_mtu_reduced() inet6_csk_update_pmtu() ip6_rt_update_pmtu() __ip6_rt_update_pmtu() ip6_rt_cache_alloc() ip6_dst_alloc() dst_alloc() ip6_dst_gc() fib6_run_gc() spin_lock_bh() ... Some of our servers have been hit by malicious ICMPv6 packets trying to _increase_ the MTU/MSS of TCP flows. We believe these ICMPv6 packets are a result of a bug in one ISP stack, since they were blindly sent back for _every_ (small) packet sent to them. These packets are for one TCP flow: 09:24:36.266491 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240 09:24:36.266509 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240 09:24:36.316688 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240 09:24:36.316704 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240 09:24:36.608151 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240 TCP stack can filter some silly requests : 1) MTU below IPV6_MIN_MTU can be filtered early in tcp_v6_err() 2) tcp_v6_mtu_reduced() can drop requests trying to increase current MSS. This tests happen before the IPv6 routing stack is entered, thus removing the potential contention and route exhaustion. Note that IPv6 stack was performing these checks, but too late (ie : after the route has been added, and after the potential garbage collect war) v2: fix typo caught by Martin, thanks ! v3: exports tcp_mtu_to_mss(), caught by David, thanks ! Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reviewed-by: Maciej Żenczykowski Cc: Martin KaFai Lau Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + net/ipv6/tcp_ipv6.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bde781f46b41a5..29553fce850286 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1732,6 +1732,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } +EXPORT_SYMBOL(tcp_mtu_to_mss); /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 593c32fe57ed13..323989927a0a6a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -348,11 +348,20 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; + u32 mtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; - dst = inet6_csk_update_pmtu(sk, READ_ONCE(tcp_sk(sk)->mtu_info)); + mtu = READ_ONCE(tcp_sk(sk)->mtu_info); + + /* Drop requests trying to increase our current mss. + * Check done in __ip6_rt_update_pmtu() is too late. + */ + if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) + return; + + dst = inet6_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -433,6 +442,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (type == ICMPV6_PKT_TOOBIG) { + u32 mtu = ntohl(info); + /* We are not interested in TCP_LISTEN and open_requests * (SYN-ACKs send out by Linux are always <576bytes so * they should go through unfragmented). @@ -443,7 +454,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; - WRITE_ONCE(tp->mtu_info, ntohl(info)); + if (mtu < IPV6_MIN_MTU) + goto out; + + WRITE_ONCE(tp->mtu_info, mtu); + if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, From debdd8e31895fdd1e2cfeb7a5aff1c83e49a91ba Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Thu, 8 Jul 2021 11:04:08 -0700 Subject: [PATCH 166/714] ptp: Relocate lookup cookie to correct block. An earlier commit set the pps_lookup cookie, but the line was somehow added to the wrong code block. Correct this. Fixes: 8602e40fc813 ("ptp: Set lookup cookie when creating a PTP PPS source.") Signed-off-by: Jonathan Lemon Signed-off-by: Dario Binacchi Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index ce6d9fc8560723..4dfc52e0670411 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -232,7 +232,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, pr_err("failed to create ptp aux_worker %d\n", err); goto kworker_err; } - ptp->pps_source->lookup_cookie = ptp; } /* PTP virtual clock is being registered under physical clock */ @@ -268,6 +267,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, pr_err("failed to register pps source\n"); goto no_pps; } + ptp->pps_source->lookup_cookie = ptp; } /* Initialize a new device of our class in our clock structure. */ From b9d233ea21f192702f8bbf3f5f640e2dde308b25 Mon Sep 17 00:00:00 2001 From: Gatis Peisenieks Date: Thu, 8 Jul 2021 12:49:04 +0300 Subject: [PATCH 167/714] atl1c: fix Mikrotik 10/25G NIC detection Since Mikrotik 10/25G NIC MDIO op emulation is not 100% reliable, on rare occasions it can happen that some physical functions of the NIC do not get initialized due to timeouted early MDIO op. This changes the atl1c probe on Mikrotik 10/25G NIC not to depend on MDIO op emulation. Signed-off-by: Gatis Peisenieks Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1c/atl1c_hw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c index 7dff20350865ab..f19370c3344462 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c @@ -594,6 +594,11 @@ int atl1c_phy_init(struct atl1c_hw *hw) int ret_val; u16 mii_bmcr_data = BMCR_RESET; + if (hw->nic_type == athr_mt) { + hw->phy_configured = true; + return 0; + } + if ((atl1c_read_phy_reg(hw, MII_PHYSID1, &hw->phy_id1) != 0) || (atl1c_read_phy_reg(hw, MII_PHYSID2, &hw->phy_id2) != 0)) { dev_err(&pdev->dev, "Error get phy ID\n"); From c34269041185dad1bab7a34f42ef9fab967a1684 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 8 Jul 2021 21:17:10 +0800 Subject: [PATCH 168/714] mt76: mt7921: continue to probe driver when fw already downloaded When reboot system, no power cycles, firmware is already downloaded, return -EIO will break driver as error: mt7921e: probe of 0000:03:00.0 failed with error -5 Skip firmware download and continue to probe. Signed-off-by: Aaron Ma Fixes: 1c099ab44727c ("mt76: mt7921: add MCU support") Signed-off-by: David S. Miller --- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index c2c4dc1968022f..cd690c64f65b98 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -931,7 +931,7 @@ static int mt7921_load_firmware(struct mt7921_dev *dev) ret = mt76_get_field(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_N9_RDY); if (ret) { dev_dbg(dev->mt76.dev, "Firmware is already download\n"); - return -EIO; + goto fw_loaded; } ret = mt7921_load_patch(dev); @@ -949,6 +949,7 @@ static int mt7921_load_firmware(struct mt7921_dev *dev) return -EIO; } +fw_loaded: mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_FWDL], false); #ifdef CONFIG_PM From 015fe6fd29c4b9ac0f61b8c4455ef88e6018b9cc Mon Sep 17 00:00:00 2001 From: Shahjada Abul Husain Date: Thu, 8 Jul 2021 21:51:56 +0530 Subject: [PATCH 169/714] cxgb4: fix IRQ free race during driver unload IRQs are requested during driver's ndo_open() and then later freed up in disable_interrupts() during driver unload. A race exists where driver can set the CXGB4_FULL_INIT_DONE flag in ndo_open() after the disable_interrupts() in driver unload path checks it, and hence misses calling free_irq(). Fix by unregistering netdevice first and sync with driver's ndo_open(). This ensures disable_interrupts() checks the flag correctly and frees up the IRQs properly. Fixes: b37987e8db5f ("cxgb4: Disable interrupts and napi before unregistering netdev") Signed-off-by: Shahjada Abul Husain Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 18 ++++++++++-------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 3 +++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 9a2b166d651e20..dbf9a0e6601d1d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2643,6 +2643,9 @@ static void detach_ulds(struct adapter *adap) { unsigned int i; + if (!is_uld(adap)) + return; + mutex_lock(&uld_mutex); list_del(&adap->list_node); @@ -7141,10 +7144,13 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_uld(adapter)) { - detach_ulds(adapter); - t4_uld_clean_up(adapter); - } + detach_ulds(adapter); + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + unregister_netdev(adapter->port[i]); + + t4_uld_clean_up(adapter); adap_free_hma_mem(adapter); @@ -7152,10 +7158,6 @@ static void remove_one(struct pci_dev *pdev) cxgb4_free_mps_ref_entries(adapter); - for_each_port(adapter, i) - if (adapter->port[i]->reg_state == NETREG_REGISTERED) - unregister_netdev(adapter->port[i]); - debugfs_remove_recursive(adapter->debugfs_root); if (!is_t4(adapter->params.chip)) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 743af9e654aa79..17faac715882d8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -581,6 +581,9 @@ void t4_uld_clean_up(struct adapter *adap) { unsigned int i; + if (!is_uld(adap)) + return; + mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) { if (!adap->uld[i].handle) From 96248d6da65744e1baaa29e5c4e5dc233e29838b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jul 2021 10:33:10 -0700 Subject: [PATCH 170/714] net: microchip: sparx5: fix kconfig warning PHY_SPARX5_SERDES depends on OF so SPARX5_SWITCH should also depend on OF since 'select' does not follow any dependencies. WARNING: unmet direct dependencies detected for PHY_SPARX5_SERDES Depends on [n]: (ARCH_SPARX5 || COMPILE_TEST [=n]) && OF [=n] && HAS_IOMEM [=y] Selected by [y]: - SPARX5_SWITCH [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROCHIP [=y] && NET_SWITCHDEV [=y] && HAS_IOMEM [=y] Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver") Signed-off-by: Randy Dunlap Cc: Lars Povlsen Cc: Steen Hegelund Cc: UNGLinuxDriver@microchip.com Cc: linux-arm-kernel@lists.infradead.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index a80419d8d4b5d3..ac403d43c74c07 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -2,6 +2,7 @@ config SPARX5_SWITCH tristate "Sparx5 switch driver" depends on NET_SWITCHDEV depends on HAS_IOMEM + depends on OF select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER From 27fa107d3b8d13a57cdd7c7a40bd6548d4b9cef8 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Thu, 8 Jul 2021 15:27:52 +0300 Subject: [PATCH 171/714] net/ncsi: fix restricted cast warning of sparse Sparse reports: net/ncsi/ncsi-rsp.c:406:24: warning: cast to restricted __be32 net/ncsi/ncsi-manage.c:732:33: warning: cast to restricted __be32 net/ncsi/ncsi-manage.c:756:25: warning: cast to restricted __be32 net/ncsi/ncsi-manage.c:779:25: warning: cast to restricted __be32 Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 6 +++--- net/ncsi/ncsi-rsp.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index ca04b6df134197..42b54a3da2e637 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -700,7 +700,7 @@ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN; memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN); - *(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID); + *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_BCM_ID); data[5] = NCSI_OEM_BCM_CMD_GMA; nca->data = data; @@ -724,7 +724,7 @@ static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca) nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN; memset(&u, 0, sizeof(u)); - u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID); + u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA; u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM; @@ -747,7 +747,7 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca) int ret = 0; memset(&u, 0, sizeof(u)); - u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID); + u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF; u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM; memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET], diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 888ccc2d4e34b1..04bc50be5c01d3 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -403,7 +403,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr) /* Update to VLAN mode */ cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); ncm->enable = 1; - ncm->data[0] = ntohl(cmd->mode); + ncm->data[0] = ntohl((__force __be32)cmd->mode); return 0; } From abd2fddc94a619b96bf41c60429d4c32bd118e17 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Thu, 8 Jul 2021 15:27:53 +0300 Subject: [PATCH 172/714] net/ncsi: add NCSI Intel OEM command to keep PHY up This allows to keep PHY link up and prevents any channel resets during the host load. It is KEEP_PHY_LINK_UP option(Veto bit) in i210 datasheet which block PHY reset and power state changes. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- net/ncsi/Kconfig | 6 ++++++ net/ncsi/internal.h | 5 +++++ net/ncsi/ncsi-manage.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig index 93309081f5a404..ea1dd32b6b1f62 100644 --- a/net/ncsi/Kconfig +++ b/net/ncsi/Kconfig @@ -17,3 +17,9 @@ config NCSI_OEM_CMD_GET_MAC help This allows to get MAC address from NCSI firmware and set them back to controller. +config NCSI_OEM_CMD_KEEP_PHY + bool "Keep PHY Link up" + depends on NET_NCSI + help + This allows to keep PHY link up and prevents any channel resets during + the host load. diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index cbbb0de4750a5a..0b6cfd3b31e0a5 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -78,6 +78,9 @@ enum { /* OEM Vendor Manufacture ID */ #define NCSI_OEM_MFR_MLX_ID 0x8119 #define NCSI_OEM_MFR_BCM_ID 0x113d +#define NCSI_OEM_MFR_INTEL_ID 0x157 +/* Intel specific OEM command */ +#define NCSI_OEM_INTEL_CMD_KEEP_PHY 0x20 /* CMD ID for Keep PHY up */ /* Broadcom specific OEM Command */ #define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */ /* Mellanox specific OEM Command */ @@ -86,6 +89,7 @@ enum { #define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */ #define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */ /* OEM Command payload lengths*/ +#define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7 #define NCSI_OEM_BCM_CMD_GMA_LEN 12 #define NCSI_OEM_MLX_CMD_GMA_LEN 8 #define NCSI_OEM_MLX_CMD_SMAF_LEN 60 @@ -271,6 +275,7 @@ enum { ncsi_dev_state_probe_mlx_gma, ncsi_dev_state_probe_mlx_smaf, ncsi_dev_state_probe_cis, + ncsi_dev_state_probe_keep_phy, ncsi_dev_state_probe_gvi, ncsi_dev_state_probe_gc, ncsi_dev_state_probe_gls, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 42b54a3da2e637..89c7742cd72e23 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -689,6 +689,35 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return 0; } +#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) + +static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) +{ + unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; + int ret = 0; + + nca->payload = NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN; + + memset(data, 0, NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN); + *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); + + data[4] = NCSI_OEM_INTEL_CMD_KEEP_PHY; + + /* PHY Link up attribute */ + data[6] = 0x1; + + nca->data = data; + + ret = ncsi_xmit_cmd(nca); + if (ret) + netdev_err(nca->ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during configure\n", + nca->type); + return ret; +} + +#endif + #if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) /* NCSI OEM Command APIs */ @@ -1391,8 +1420,24 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) goto error; } + nd->state = ncsi_dev_state_probe_gvi; + if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) + nd->state = ncsi_dev_state_probe_keep_phy; + break; +#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) + case ncsi_dev_state_probe_keep_phy: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_OEM; + nca.package = ndp->active_package->id; + nca.channel = 0; + ret = ncsi_oem_keep_phy_intel(&nca); + if (ret) + goto error; + nd->state = ncsi_dev_state_probe_gvi; break; +#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */ case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: From 163f5de509a8ec193df94a9b9afbeb1a9e3f46a6 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Thu, 8 Jul 2021 15:27:54 +0300 Subject: [PATCH 173/714] net/ncsi: add dummy response handler for Intel boards Add the dummy response handler for Intel boards to prevent incorrect handling of OEM commands. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- net/ncsi/ncsi-rsp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 04bc50be5c01d3..d483748948177b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -699,12 +699,19 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) return 0; } +/* Response handler for Intel card */ +static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) +{ + return 0; +} + static struct ncsi_rsp_oem_handler { unsigned int mfr_id; int (*handler)(struct ncsi_request *nr); } ncsi_rsp_oem_handlers[] = { { NCSI_OEM_MFR_MLX_ID, ncsi_rsp_handler_oem_mlx }, - { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm } + { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }, + { NCSI_OEM_MFR_INTEL_ID, ncsi_rsp_handler_oem_intel } }; /* Response handler for OEM command */ From 2b452550a203d88112eaf0ba9fc4b750a000b496 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 8 Jul 2021 18:55:32 -0700 Subject: [PATCH 174/714] net: bcmgenet: Ensure all TX/RX queues DMAs are disabled Make sure that we disable each of the TX and RX queues in the TDMA and RDMA control registers. This is a correctness change to be symmetrical with the code that enables the TX and RX queues. Tested-by: Maxime Ripard Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 35e9956e930c18..db74241935ab41 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3238,15 +3238,21 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, /* Returns a reusable dma control register value */ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) { + unsigned int i; u32 reg; u32 dma_ctrl; /* disable DMA */ dma_ctrl = 1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT) | DMA_EN; + for (i = 0; i < priv->hw_params->tx_queues; i++) + dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); reg = bcmgenet_tdma_readl(priv, DMA_CTRL); reg &= ~dma_ctrl; bcmgenet_tdma_writel(priv, reg, DMA_CTRL); + dma_ctrl = 1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT) | DMA_EN; + for (i = 0; i < priv->hw_params->rx_queues; i++) + dma_ctrl |= (1 << (i + DMA_RING_BUF_EN_SHIFT)); reg = bcmgenet_rdma_readl(priv, DMA_CTRL); reg &= ~dma_ctrl; bcmgenet_rdma_writel(priv, reg, DMA_CTRL); From f263a81451c12da5a342d90572e317e611846f2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 7 Jul 2021 15:38:47 -0700 Subject: [PATCH 175/714] bpf: Track subprog poke descriptors correctly and fix use-after-free Subprograms are calling map_poke_track(), but on program release there is no hook to call map_poke_untrack(). However, on program release, the aux memory (and poke descriptor table) is freed even though we still have a reference to it in the element list of the map aux data. When we run map_poke_run(), we then end up accessing free'd memory, triggering KASAN in prog_array_map_poke_run(): [...] [ 402.824689] BUG: KASAN: use-after-free in prog_array_map_poke_run+0xc2/0x34e [ 402.824698] Read of size 4 at addr ffff8881905a7940 by task hubble-fgs/4337 [ 402.824705] CPU: 1 PID: 4337 Comm: hubble-fgs Tainted: G I 5.12.0+ #399 [ 402.824715] Call Trace: [ 402.824719] dump_stack+0x93/0xc2 [ 402.824727] print_address_description.constprop.0+0x1a/0x140 [ 402.824736] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824740] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824744] kasan_report.cold+0x7c/0xd8 [ 402.824752] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824757] prog_array_map_poke_run+0xc2/0x34e [ 402.824765] bpf_fd_array_map_update_elem+0x124/0x1a0 [...] The elements concerned are walked as follows: for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; [...] The access to size_poke_tab is a 4 byte read, verified by checking offsets in the KASAN dump: [ 402.825004] The buggy address belongs to the object at ffff8881905a7800 which belongs to the cache kmalloc-1k of size 1024 [ 402.825008] The buggy address is located 320 bytes inside of 1024-byte region [ffff8881905a7800, ffff8881905a7c00) The pahole output of bpf_prog_aux: struct bpf_prog_aux { [...] /* --- cacheline 5 boundary (320 bytes) --- */ u32 size_poke_tab; /* 320 4 */ [...] In general, subprograms do not necessarily manage their own data structures. For example, BTF func_info and linfo are just pointers to the main program structure. This allows reference counting and cleanup to be done on the latter which simplifies their management a bit. The aux->poke_tab struct, however, did not follow this logic. The initial proposed fix for this use-after-free bug further embedded poke data tracking into the subprogram with proper reference counting. However, Daniel and Alexei questioned why we were treating these objects special; I agree, its unnecessary. The fix here removes the per subprogram poke table allocation and map tracking and instead simply points the aux->poke_tab pointer at the main programs poke table. This way, map tracking is simplified to the main program and we do not need to manage them per subprogram. This also means, bpf_prog_free_deferred(), which unwinds the program reference counting and kfrees objects, needs to ensure that we don't try to double free the poke_tab when free'ing the subprog structures. This is easily solved by NULL'ing the poke_tab pointer. The second detail is to ensure that per subprogram JIT logic only does fixups on poke_tab[] entries it owns. To do this, we add a pointer in the poke structure to point at the subprogram value so JITs can easily check while walking the poke_tab structure if the current entry belongs to the current program. The aux pointer is stable and therefore suitable for such comparison. On the jit_subprogs() error path, we omit cleaning up the poke->aux field because these are only ever referenced from the JIT side, but on error we will never make it to the JIT, so its fine to leave them dangling. Removing these pointers would complicate the error path for no reason. However, we do need to untrack all poke descriptors from the main program as otherwise they could race with the freeing of JIT memory from the subprograms. Lastly, a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") had an off-by-one on the subprogram instruction index range check as it was testing 'insn_idx >= subprog_start && insn_idx <= subprog_end'. However, subprog_end is the next subprogram's start instruction. Fixes: a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210707223848.14580-2-john.fastabend@gmail.com --- arch/x86/net/bpf_jit_comp.c | 3 ++ include/linux/bpf.h | 1 + kernel/bpf/core.c | 8 ++++- kernel/bpf/verifier.c | 60 +++++++++++++------------------------ 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e835164189f160..4b951458c9fc99 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) for (i = 0; i < prog->aux->size_poke_tab; i++) { poke = &prog->aux->poke_tab[i]; + if (poke->aux && poke->aux != prog->aux) + continue; + WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); if (poke->reason != BPF_POKE_REASON_TAIL_CALL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f309fc1509f2cb..e8e2b0393ca934 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor { void *tailcall_target; void *tailcall_bypass; void *bypass_addr; + void *aux; union { struct { struct bpf_map *map; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 034ad93a1ad717..9b15774983738d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) + for (i = 0; i < aux->func_cnt; i++) { + /* We can just unlink the subprog poke descriptor table as + * it was originally linked to the main program and is also + * released along with it. + */ + aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); + } if (aux->func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index be38bb930bf1e0..42a4063de7cd24 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) goto out_free; func[i]->is_func = 1; func[i]->aux->func_idx = i; - /* the btf and func_info will be freed only at prog->aux */ + /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->poke_tab = prog->aux->poke_tab; + func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; for (j = 0; j < prog->aux->size_poke_tab; j++) { - u32 insn_idx = prog->aux->poke_tab[j].insn_idx; - int ret; + struct bpf_jit_poke_descriptor *poke; - if (!(insn_idx >= subprog_start && - insn_idx <= subprog_end)) - continue; - - ret = bpf_jit_add_poke_descriptor(func[i], - &prog->aux->poke_tab[j]); - if (ret < 0) { - verbose(env, "adding tail call poke descriptor failed\n"); - goto out_free; - } - - func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1; - - map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; - ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); - if (ret < 0) { - verbose(env, "tracking tail call prog failed\n"); - goto out_free; - } + poke = &prog->aux->poke_tab[j]; + if (poke->insn_idx < subprog_end && + poke->insn_idx >= subprog_start) + poke->aux = func[i]->aux; } /* Use bpf_prog_F_tag to indicate functions in stack traces. @@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) cond_resched(); } - /* Untrack main program's aux structs so that during map_poke_run() - * we will not stumble upon the unfilled poke descriptors; each - * of the main program's poke descs got distributed across subprogs - * and got tracked onto map, so we are sure that none of them will - * be missed after the operation below - */ - for (i = 0; i < prog->aux->size_poke_tab; i++) { - map_ptr = prog->aux->poke_tab[i].tail_call.map; - - map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); - } - /* at this point all bpf functions were successfully JITed * now populate all bpf_calls with correct addresses and * run last pass of JIT @@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_jit_attempt_done(prog); return 0; out_free: + /* We failed JIT'ing, so at this point we need to unregister poke + * descriptors from subprogs, so that kernel is not attempting to + * patch it anymore as we're freeing the subprog JIT memory. + */ + for (i = 0; i < prog->aux->size_poke_tab; i++) { + map_ptr = prog->aux->poke_tab[i].tail_call.map; + map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); + } + /* At this point we're guaranteed that poke descriptors are not + * live anymore. We can just unlink its descriptor table as it's + * released with the main prog. + */ for (i = 0; i < env->subprog_cnt; i++) { if (!func[i]) continue; - - for (j = 0; j < func[i]->aux->size_poke_tab; j++) { - map_ptr = func[i]->aux->poke_tab[j].tail_call.map; - map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux); - } + func[i]->aux->poke_tab = NULL; bpf_jit_free(func[i]); } kfree(func); From 1fb5ba29ad0835c5cbfc69a27f9c2733cb65726e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 7 Jul 2021 15:38:48 -0700 Subject: [PATCH 176/714] bpf: Selftest to verify mixing bpf2bpf calls and tailcalls with insn patch This adds some extra noise to the tailcall_bpf2bpf4 tests that will cause verify to patch insns. This then moves around subprog start/end insn index and poke descriptor insn index to ensure that verify and JIT will continue to track these correctly. If done correctly verifier should pass this program same as before and JIT should emit tail call logic. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210707223848.14580-3-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/tailcalls.c | 36 +++++++++++++------ .../selftests/bpf/progs/tailcall_bpf2bpf4.c | 18 ++++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index ee27d68d2a1c6e..b5940e6ca67cb7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -715,6 +715,8 @@ static void test_tailcall_bpf2bpf_3(void) bpf_object__close(obj); } +#include "tailcall_bpf2bpf4.skel.h" + /* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved * across tailcalls combined with bpf2bpf calls. for making sure that tailcall * counter behaves correctly, bpf program will go through following flow: @@ -727,10 +729,15 @@ static void test_tailcall_bpf2bpf_3(void) * the loop begins. At the end of the test make sure that the global counter is * equal to 31, because tailcall counter includes the first two tailcalls * whereas global counter is incremented only on loop presented on flow above. + * + * The noise parameter is used to insert bpf_map_update calls into the logic + * to force verifier to patch instructions. This allows us to ensure jump + * logic remains correct with instruction movement. */ -static void test_tailcall_bpf2bpf_4(void) +static void test_tailcall_bpf2bpf_4(bool noise) { - int err, map_fd, prog_fd, main_fd, data_fd, i, val; + int err, map_fd, prog_fd, main_fd, data_fd, i; + struct tailcall_bpf2bpf4__bss val; struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; @@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); - data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) return; @@ -787,10 +789,22 @@ static void test_tailcall_bpf2bpf_4(void) if (CHECK_FAIL(map_fd < 0)) return; + i = 0; + val.noise = noise; + val.count = 0; + err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", - err, errno, val); + CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val.count); out: bpf_object__close(obj); @@ -815,5 +829,7 @@ void test_tailcalls(void) if (test__start_subtest("tailcall_bpf2bpf_3")) test_tailcall_bpf2bpf_3(); if (test__start_subtest("tailcall_bpf2bpf_4")) - test_tailcall_bpf2bpf_4(); + test_tailcall_bpf2bpf_4(false); + if (test__start_subtest("tailcall_bpf2bpf_5")) + test_tailcall_bpf2bpf_4(true); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 77df6d4db89564..e89368a50b9795 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -2,6 +2,13 @@ #include #include +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} nop_table SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __uint(max_entries, 3); @@ -10,10 +17,21 @@ struct { } jmp_table SEC(".maps"); int count = 0; +int noise = 0; + +__always_inline int subprog_noise(void) +{ + __u32 key = 0; + + bpf_map_lookup_elem(&nop_table, &key); + return 0; +} __noinline int subprog_tail_2(struct __sk_buff *skb) { + if (noise) + subprog_noise(); bpf_tail_call_static(skb, &jmp_table, 2); return skb->len * 3; } From 3efe180d5105d367ae1dfadb97892ab93a89a783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 6 Jul 2021 08:51:25 +0200 Subject: [PATCH 177/714] drm/qxl: add NULL check for bo->resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When allocations fails that can be NULL now. Signed-off-by: Christian König Reported-by: Daniel Bristot de Oliveira Tested-by: Daniel Bristot de Oliveira Tested-by: Roberto Sassu Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2") Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708114710.8186-1-christian.koenig@amd.com --- drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 19fd39d9a00cb4..37a1b6a6ad6dce 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -127,7 +127,7 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo, struct qxl_bo *qbo; struct qxl_device *qdev; - if (!qxl_ttm_bo_is_qxl_bo(bo)) + if (!qxl_ttm_bo_is_qxl_bo(bo) || !bo->resource) return; qbo = to_qxl_bo(bo); qdev = to_qxl(qbo->tbo.base.dev); From 34e0fc345ae728cd974d9ee09832abf62cf054c6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 9 Jul 2021 14:08:12 +0200 Subject: [PATCH 178/714] arm64: tegra: Enable audio IOMMU support on Tegra194 Add iommus and interconnects properties to the sound device tree node on Tegra194. This ensures that the correct SID is used for translation of physical to I/O virtual addresses and that the path to system memory is properly described, which in turn can impact the range of memory that the device can address. Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194") Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index b7d53284139000..0e6eeb1a1c1ad1 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -2469,6 +2469,11 @@ * for 8x and 11.025x sample rate streams. */ assigned-clock-rates = <258000000>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_APEDMAR &emc>, + <&mc TEGRA194_MEMORY_CLIENT_APEDMAW &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_APE>; }; tcu: tcu { From c667dcd4dfcd515ad2c9b3953a33d742985a0b5e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 8 Jul 2021 12:39:32 +0200 Subject: [PATCH 179/714] arm64: tegra: Enable SMMU support for USB on Tegra194 As of commit c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194"), SMMU support is enabled system-wide on Tegra194. However, there was a bit of overlap between the SMMU enablement and the USB support addition, so the USB device tree nodes are missing the iommus and interconnects properties. This in turn leads to SMMU faults for these devices, since by default the ARM SMMU will fault. Add the iommus and interconnects properties to the XUSB and XUDC device tree nodes to restore their functionality. Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194") Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 0e6eeb1a1c1ad1..076d5efc4c3d81 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -948,6 +948,10 @@ <&bpmp TEGRA194_CLK_XUSB_SS>, <&bpmp TEGRA194_CLK_XUSB_FS>; clock-names = "dev", "ss", "ss_src", "fs_src"; + interconnects = <&mc TEGRA194_MEMORY_CLIENT_XUSB_DEVR &emc>, + <&mc TEGRA194_MEMORY_CLIENT_XUSB_DEVW &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_XUSB_DEV>; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_XUSBB>, <&bpmp TEGRA194_POWER_DOMAIN_XUSBA>; power-domain-names = "dev", "ss"; @@ -977,6 +981,10 @@ "xusb_ss", "xusb_ss_src", "xusb_hs_src", "xusb_fs_src", "pll_u_480m", "clk_m", "pll_e"; + interconnects = <&mc TEGRA194_MEMORY_CLIENT_XUSB_HOSTR &emc>, + <&mc TEGRA194_MEMORY_CLIENT_XUSB_HOSTW &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_XUSB_HOST>; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_XUSBC>, <&bpmp TEGRA194_POWER_DOMAIN_XUSBA>; From c78eaeebe855fd93f2e77142ffd0404a54070d84 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 9 Jul 2021 17:09:53 +0300 Subject: [PATCH 180/714] net: moxa: fix UAF in moxart_mac_probe In case of netdev registration failure the code path will jump to init_fail label: init_fail: netdev_err(ndev, "init failed\n"); moxart_mac_free_memory(ndev); irq_map_fail: free_netdev(ndev); return ret; So, there is no need to call free_netdev() before jumping to error handling path, since it can cause UAF or double-free bug. Fixes: 6c821bd9edc9 ("net: Add MOXA ART SoCs ethernet driver") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 5249b64f4fc549..49def6934cad1b 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -540,10 +540,8 @@ static int moxart_mac_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); ret = register_netdev(ndev); - if (ret) { - free_netdev(ndev); + if (ret) goto init_fail; - } netdev_dbg(ndev, "%s: IRQ=%d address=%pM\n", __func__, ndev->irq, ndev->dev_addr); From ad297cd2db8953e2202970e9504cab247b6c7cb4 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 9 Jul 2021 17:24:18 +0300 Subject: [PATCH 181/714] net: qcom/emac: fix UAF in emac_remove adpt is netdev private data and it cannot be used after free_netdev() call. Using adpt after free_netdev() can cause UAF bug. Fix it by moving free_netdev() at the end of the function. Fixes: 54e19bc74f33 ("net: qcom/emac: do not use devm on internal phy pdev") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 8543bf3c348408..ad655f0a4965ce 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -735,12 +735,13 @@ static int emac_remove(struct platform_device *pdev) put_device(&adpt->phydev->mdio.dev); mdiobus_unregister(adpt->mii_bus); - free_netdev(netdev); if (adpt->phy.digital) iounmap(adpt->phy.digital); iounmap(adpt->phy.base); + free_netdev(netdev); + return 0; } From 0336f8ffece62f882ab3012820965a786a983f70 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 9 Jul 2021 17:58:29 +0300 Subject: [PATCH 182/714] net: ti: fix UAF in tlan_remove_one priv is netdev private data and it cannot be used after free_netdev() call. Using priv after free_netdev() can cause UAF bug. Fix it by moving free_netdev() at the end of the function. Fixes: 1e0a8b13d355 ("tlan: cancel work at remove path") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/tlan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 0b2ce4bdc2c3d8..e0cb713193ea4b 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -313,9 +313,8 @@ static void tlan_remove_one(struct pci_dev *pdev) pci_release_regions(pdev); #endif - free_netdev(dev); - cancel_work_sync(&priv->tlan_tqueue); + free_netdev(dev); } static void tlan_start(struct net_device *dev) From 43b90bfad34bcb81b8a5bc7dc650800f4be1787e Mon Sep 17 00:00:00 2001 From: Alexander Ovechkin Date: Fri, 9 Jul 2021 18:28:23 +0300 Subject: [PATCH 183/714] net: send SYNACK packet with accepted fwmark commit e05a90ec9e16 ("net: reflect mark on tcp syn ack packets") fixed IPv4 only. This part is for the IPv6 side. Fixes: e05a90ec9e16 ("net: reflect mark on tcp syn ack packets") Signed-off-by: Alexander Ovechkin Acked-by: Dmitry Yakunin Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 323989927a0a6a..0ce52d46e4f81b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -555,7 +555,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, + err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); From 358ed624207012f03318235017ac6fb41f8af592 Mon Sep 17 00:00:00 2001 From: Talal Ahmad Date: Fri, 9 Jul 2021 11:43:06 -0400 Subject: [PATCH 184/714] tcp: call sk_wmem_schedule before sk_mem_charge in zerocopy path sk_wmem_schedule makes sure that sk_forward_alloc has enough bytes for charging that is going to be done by sk_mem_charge. In the transmit zerocopy path, there is sk_mem_charge but there was no call to sk_wmem_schedule. This change adds that call. Without this call to sk_wmem_schedule, sk_forward_alloc can go negetive which is a bug because sk_forward_alloc is a per-socket space that has been forward charged so this can't be negative. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Talal Ahmad Reviewed-by: Willem de Bruijn Reviewed-by: Wei Wang Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d5ab5f2436408c..8cb44040ec68b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1375,6 +1375,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) } pfrag->offset += copy; } else { + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_space; + err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); if (err == -EMSGSIZE || err == -EEXIST) { tcp_mark_push(tp, skb); From 28b34f01a73435a754956ebae826e728c03ffa38 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 9 Jul 2021 18:16:09 +0200 Subject: [PATCH 185/714] net: do not reuse skbuff allocated from skbuff_fclone_cache in the skb cache Some socket buffers allocated in the fclone cache (in __alloc_skb) can end-up in the following path[1]: napi_skb_finish __kfree_skb_defer napi_skb_cache_put The issue is napi_skb_cache_put is not fclone friendly and will put those skbuff in the skb cache to be reused later, although this cache only expects skbuff allocated from skbuff_head_cache. When this happens the skbuff is eventually freed using the wrong origin cache, and we can see traces similar to: [ 1223.947534] cache_from_obj: Wrong slab cache. skbuff_head_cache but object is from skbuff_fclone_cache [ 1223.948895] WARNING: CPU: 3 PID: 0 at mm/slab.h:442 kmem_cache_free+0x251/0x3e0 [ 1223.950211] Modules linked in: [ 1223.950680] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.13.0+ #474 [ 1223.951587] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-3.fc34 04/01/2014 [ 1223.953060] RIP: 0010:kmem_cache_free+0x251/0x3e0 Leading sometimes to other memory related issues. Fix this by using __kfree_skb for fclone skbuff, similar to what is done the other place __kfree_skb_defer is called. [1] At least in setups using veth pairs and tunnels. Building a kernel with KASAN we can for example see packets allocated in sk_stream_alloc_skb hit the above path and later the issue arises when the skbuff is reused. Fixes: 9243adfc311a ("skbuff: queue NAPI_MERGED_FREE skbs into NAPI cache instead of freeing") Cc: Alexander Lobakin Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 03c95a0867bb36..64b21f0a20483b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6234,6 +6234,8 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) napi_skb_free_stolen_head(skb); + else if (skb->fclone != SKB_FCLONE_UNAVAILABLE) + __kfree_skb(skb); else __kfree_skb_defer(skb); break; From c26d6586e97a69ef9b429cc577ca4c9d2d2ec7cd Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 9 Jun 2021 13:23:07 -0400 Subject: [PATCH 186/714] drm/vmwgfx: Fix implicit declaration error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The declarations of ttm_range_man_init and ttm_range_man_fini have been moved to ttm_range_manager.h so we have to add it to the include list. Signed-off-by: Zack Rusin Reported-by: Randy Dunlap Fixes: 3eb7d96e9415 ("drm/ttm: flip over the range manager to self allocated nodes") Cc: Christian König Cc: Matthew Auld Reviewed-by: Martin Krastev Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210609172307.131929-10-zackr@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6f5ea00973e003..45aeeca9b8f659 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include From 34bd46bcf3de72cbffcdc42d3fa67e543d1c869b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 15 Jun 2021 14:23:35 -0400 Subject: [PATCH 187/714] drm/vmwgfx: Fix a bad merge in otable batch takedown Change 2ef4fb92363c ("drm/vmwgfx: Make sure bo's are unpinned before putting them back") caused a conflict in one of the drm trees and the merge commit 68a32ba14177 ("Merge tag 'drm-next-2021-04-28' of git://anongit.freedesktop.org/drm/drm") accidently re-added code that the original change was removing. Fixed by removing the incorrect buffer unpin - it has already been unpinned two lines above. Fixes: 68a32ba14177 ("Merge tag 'drm-next-2021-04-28' of git://anongit.freedesktop.org/drm/drm") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20210615182336.995192-4-zackr@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index 5648664f71bc12..f2d62541545858 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -354,7 +354,6 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, ttm_bo_unpin(bo); ttm_bo_unreserve(bo); - ttm_bo_unpin(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; } From 9992a078b1771da354ac1f9737e1e639b687caa2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 9 Jul 2021 11:45:02 +0800 Subject: [PATCH 188/714] net: ip_tunnel: fix mtu calculation for ETHER tunnel devices Commit 28e104d00281 ("net: ip_tunnel: fix mtu calculation") removed dev->hard_header_len subtraction when calculate MTU for tunnel devices as there is an overhead for device that has header_ops. But there are ETHER tunnel devices, like gre_tap or erspan, which don't have header_ops but set dev->hard_header_len during setup. This makes pkts greater than (MTU - ETH_HLEN) could not be xmited. Fix it by subtracting the ETHER tunnel devices' dev->hard_header_len for MTU calculation. Fixes: 28e104d00281 ("net: ip_tunnel: fix mtu calculation") Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f6cc26de5ed304..0dca00745ac3ce 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } dev->needed_headroom = t_hlen + hlen; - mtu -= t_hlen; + mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0); if (mtu < IPV4_MIN_MTU) mtu = IPV4_MIN_MTU; @@ -348,6 +348,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP_MAX_MTU - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->max_mtu -= dev->hard_header_len; + ip_tunnel_add(itn, nt); return nt; @@ -489,11 +492,14 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; pkt_size = skb->len - tunnel_hlen; + pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; - if (df) + if (df) { mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); - else + mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; + } else { mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + } if (skb_valid_dst(skb)) skb_dst_update_pmtu_no_confirm(skb, mtu); @@ -972,6 +978,9 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) int t_hlen = tunnel->hlen + sizeof(struct iphdr); int max_mtu = IP_MAX_MTU - t_hlen; + if (dev->type == ARPHRD_ETHER) + max_mtu -= dev->hard_header_len; + if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1149,6 +1158,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], if (tb[IFLA_MTU]) { unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); + if (dev->type == ARPHRD_ETHER) + max -= dev->hard_header_len; + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); } From 67a9c94317402b826fc3db32afc8f39336803d97 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 9 Jul 2021 17:35:18 +0000 Subject: [PATCH 189/714] net: validate lwtstate->data before returning from skb_tunnel_info() skb_tunnel_info() returns pointer of lwtstate->data as ip_tunnel_info type without validation. lwtstate->data can have various types such as mpls_iptunnel_encap, etc and these are not compatible. So skb_tunnel_info() should validate before returning that pointer. Splat looks like: BUG: KASAN: slab-out-of-bounds in vxlan_get_route+0x418/0x4b0 [vxlan] Read of size 2 at addr ffff888106ec2698 by task ping/811 CPU: 1 PID: 811 Comm: ping Not tainted 5.13.0+ #1195 Call Trace: dump_stack_lvl+0x56/0x7b print_address_description.constprop.8.cold.13+0x13/0x2ee ? vxlan_get_route+0x418/0x4b0 [vxlan] ? vxlan_get_route+0x418/0x4b0 [vxlan] kasan_report.cold.14+0x83/0xdf ? vxlan_get_route+0x418/0x4b0 [vxlan] vxlan_get_route+0x418/0x4b0 [vxlan] [ ... ] vxlan_xmit_one+0x148b/0x32b0 [vxlan] [ ... ] vxlan_xmit+0x25c5/0x4780 [vxlan] [ ... ] dev_hard_start_xmit+0x1ae/0x6e0 __dev_queue_xmit+0x1f39/0x31a0 [ ... ] neigh_xmit+0x2f9/0x940 mpls_xmit+0x911/0x1600 [mpls_iptunnel] lwtunnel_xmit+0x18f/0x450 ip_finish_output2+0x867/0x2040 [ ... ] Fixes: 61adedf3e3f1 ("route: move lwtunnel state to dst_entry") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 56cb3c38569a77..14efa0ded75dd9 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -45,7 +45,9 @@ skb_tunnel_info(const struct sk_buff *skb) return &md_dst->u.tun_info; dst = skb_dst(skb); - if (dst && dst->lwtstate) + if (dst && dst->lwtstate && + (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || + dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; From 0c71929b5893e410e0efbe1bbeca6f19a5f19956 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:46 -0700 Subject: [PATCH 190/714] mptcp: fix warning in __skb_flow_dissect() when do syn cookie for subflow join I did stress test with wrk[1] and webfsd[2] with the assistance of mptcp-tools[3]: Server side: ./use_mptcp.sh webfsd -4 -R /tmp/ -p 8099 Client side: ./use_mptcp.sh wrk -c 200 -d 30 -t 4 http://192.168.174.129:8099/ and got the following warning message: [ 55.552626] TCP: request_sock_subflow: Possible SYN flooding on port 8099. Sending cookies. Check SNMP counters. [ 55.553024] ------------[ cut here ]------------ [ 55.553027] WARNING: CPU: 0 PID: 10 at net/core/flow_dissector.c:984 __skb_flow_dissect+0x280/0x1650 ... [ 55.553117] CPU: 0 PID: 10 Comm: ksoftirqd/0 Not tainted 5.12.0+ #18 [ 55.553121] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020 [ 55.553124] RIP: 0010:__skb_flow_dissect+0x280/0x1650 ... [ 55.553133] RSP: 0018:ffffb79580087770 EFLAGS: 00010246 [ 55.553137] RAX: 0000000000000000 RBX: ffffffff8ddb58e0 RCX: ffffb79580087888 [ 55.553139] RDX: ffffffff8ddb58e0 RSI: ffff8f7e4652b600 RDI: 0000000000000000 [ 55.553141] RBP: ffffb79580087858 R08: 0000000000000000 R09: 0000000000000008 [ 55.553143] R10: 000000008c622965 R11: 00000000d3313a5b R12: ffff8f7e4652b600 [ 55.553146] R13: ffff8f7e465c9062 R14: 0000000000000000 R15: ffffb79580087888 [ 55.553149] FS: 0000000000000000(0000) GS:ffff8f7f75e00000(0000) knlGS:0000000000000000 [ 55.553152] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.553154] CR2: 00007f73d1d19000 CR3: 0000000135e10004 CR4: 00000000003706f0 [ 55.553160] Call Trace: [ 55.553166] ? __sha256_final+0x67/0xd0 [ 55.553173] ? sha256+0x7e/0xa0 [ 55.553177] __skb_get_hash+0x57/0x210 [ 55.553182] subflow_init_req_cookie_join_save+0xac/0xc0 [ 55.553189] subflow_check_req+0x474/0x550 [ 55.553195] ? ip_route_output_key_hash+0x67/0x90 [ 55.553200] ? xfrm_lookup_route+0x1d/0xa0 [ 55.553207] subflow_v4_route_req+0x8e/0xd0 [ 55.553212] tcp_conn_request+0x31e/0xab0 [ 55.553218] ? selinux_socket_sock_rcv_skb+0x116/0x210 [ 55.553224] ? tcp_rcv_state_process+0x179/0x6d0 [ 55.553229] tcp_rcv_state_process+0x179/0x6d0 [ 55.553235] tcp_v4_do_rcv+0xaf/0x220 [ 55.553239] tcp_v4_rcv+0xce4/0xd80 [ 55.553243] ? ip_route_input_rcu+0x246/0x260 [ 55.553248] ip_protocol_deliver_rcu+0x35/0x1b0 [ 55.553253] ip_local_deliver_finish+0x44/0x50 [ 55.553258] ip_local_deliver+0x6c/0x110 [ 55.553262] ? ip_rcv_finish_core.isra.19+0x5a/0x400 [ 55.553267] ip_rcv+0xd1/0xe0 ... After debugging, I found in __skb_flow_dissect(), skb->dev and skb->sk are both NULL, then net is NULL, and trigger WARN_ON_ONCE(!net), actually net is always NULL in this code path, as skb->dev is set to NULL in tcp_v4_rcv(), and skb->sk is never set. Code snippet in __skb_flow_dissect() that trigger warning: 975 if (skb) { 976 if (!net) { 977 if (skb->dev) 978 net = dev_net(skb->dev); 979 else if (skb->sk) 980 net = sock_net(skb->sk); 981 } 982 } 983 984 WARN_ON_ONCE(!net); So, using seq and transport header derived hash. [1] https://github.com/wg/wrk [2] https://github.com/ourway/webfsd [3] https://github.com/pabeni/mptcp-tools Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use") Suggested-by: Paolo Abeni Suggested-by: Florian Westphal Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/syncookies.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c index abe0fd0997467c..37127781aee987 100644 --- a/net/mptcp/syncookies.c +++ b/net/mptcp/syncookies.c @@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net) { - u32 i = skb_get_hash(skb) ^ net_hash_mix(net); + static u32 mptcp_join_hash_secret __read_mostly; + struct tcphdr *th = tcp_hdr(skb); + u32 seq, i; + + net_get_random_once(&mptcp_join_hash_secret, + sizeof(mptcp_join_hash_secret)); + + if (th->syn) + seq = TCP_SKB_CB(skb)->seq; + else + seq = TCP_SKB_CB(skb)->seq - 1; + + i = jhash_3words(seq, net_hash_mix(net), + (__force __u32)th->source << 16 | (__force __u32)th->dest, + mptcp_join_hash_secret); return i % ARRAY_SIZE(join_entries); } From 030d37bd1cd2443a1f21db47eb301899bfa45a2a Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:47 -0700 Subject: [PATCH 191/714] mptcp: remove redundant req destruct in subflow_check_req() In subflow_check_req(), if subflow sport is mismatch, will put msk, destroy token, and destruct req, then return -EPERM, which can be done by subflow_req_destructor() via: tcp_conn_request() |--__reqsk_free() |--subflow_req_destructor() So we should remove these redundant code, otherwise will call tcp_v4_reqsk_destructor() twice, and may double free inet_rsk(req)->ireq_opt. Fixes: 5bc56388c74f ("mptcp: add port number check for MP_JOIN") Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 66d0b1893d2691..b15e2017168d62 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -214,11 +214,6 @@ static int subflow_check_req(struct request_sock *req, ntohs(inet_sk(sk_listener)->inet_sport), ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { - sock_put((struct sock *)subflow_req->msk); - mptcp_token_destroy_request(req); - tcp_request_sock_ops.destructor(req); - subflow_req->msk = NULL; - subflow_req->mp_join = 0; SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX); return -EPERM; } From 8547ea5f52dd8ef19b69c25c41b1415481b3503b Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:48 -0700 Subject: [PATCH 192/714] mptcp: fix syncookie process if mptcp can not_accept new subflow Lots of "TCP: tcp_fin: Impossible, sk->sk_state=7" in client side when doing stress testing using wrk and webfsd. There are at least two cases may trigger this warning: 1.mptcp is in syncookie, and server recv MP_JOIN SYN request, in subflow_check_req(), the mptcp_can_accept_new_subflow() return false, so subflow_init_req_cookie_join_save() isn't called, i.e. not store the data present in the MP_JOIN syn request and the random nonce in hash table - join_entries[], but still send synack. When recv 3rd-ack, mptcp_token_join_cookie_init_state() will return false, and 3rd-ack is dropped, then if mptcp conn is closed by client, client will send a DATA_FIN and a MPTCP FIN, the DATA_FIN doesn't have MP_CAPABLE or MP_JOIN, so mptcp_subflow_init_cookie_req() will return 0, and pass the cookie check, MP_JOIN request is fallback to normal TCP. Server will send a TCP FIN if closed, in client side, when process TCP FIN, it will do reset, the code path is: tcp_data_queue()->mptcp_incoming_options() ->check_fully_established()->mptcp_subflow_reset(). mptcp_subflow_reset() will set sock state to TCP_CLOSE, so tcp_fin will hit TCP_CLOSE, and print the warning. 2.mptcp is in syncookie, and server recv 3rd-ack, in mptcp_subflow_init_cookie_req(), mptcp_can_accept_new_subflow() return false, and subflow_req->mp_join is not set to 1, so in subflow_syn_recv_sock() will not reset the MP_JOIN subflow, but fallback to normal TCP, and then the same thing happens when server will send a TCP FIN if closed. For case1, subflow_check_req() return -EPERM, then tcp_conn_request() will drop MP_JOIN SYN. For case2, let subflow_syn_recv_sock() call mptcp_can_accept_new_subflow(), and do fatal fallback, send reset. Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use") Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b15e2017168d62..966f777d35ce9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -225,6 +225,8 @@ static int subflow_check_req(struct request_sock *req, if (unlikely(req->syncookie)) { if (mptcp_can_accept_new_subflow(subflow_req->msk)) subflow_init_req_cookie_join_save(subflow_req, skb); + else + return -EPERM; } pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, @@ -264,9 +266,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) return -EINVAL; - if (mptcp_can_accept_new_subflow(subflow_req->msk)) - subflow_req->mp_join = 1; - + subflow_req->mp_join = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; } From 6787b7e350d3552651a3422d3d8980fbc8d65368 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:49 -0700 Subject: [PATCH 193/714] mptcp: avoid processing packet if a subflow reset If check_fully_established() causes a subflow reset, it should not continue to process the packet in tcp_data_queue(). Add a return value to mptcp_incoming_options(), and return false if a subflow has been reset, else return true. Then drop the packet in tcp_data_queue()/tcp_rcv_state_process() if mptcp_incoming_options() return false. Fixes: d582484726c4 ("mptcp: fix fallback for MP_JOIN subflows") Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 5 +++-- net/ipv4/tcp_input.c | 19 +++++++++++++++---- net/mptcp/options.c | 19 +++++++++++++------ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index cb580b06152f88..8b5af683a818c3 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts); -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts); @@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk, return false; } -static inline void mptcp_incoming_options(struct sock *sk, +static inline bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { + return true; } static inline void mptcp_skb_ext_move(struct sk_buff *to, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5a8d0a378b23f..149ceb5c94ffcd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) { trace_tcp_receive_reset(sk); + /* mptcp can't tell us to ignore reset pkts, + * so just ignore the return value of mptcp_incoming_options(). + */ if (sk_is_mptcp(sk)) mptcp_incoming_options(sk, skb); @@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) bool fragstolen; int eaten; - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not continue + * to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) { + __kfree_skb(skb); + return; + } if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_CLOSING: case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not + * continue to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) + goto discard; break; } fallthrough; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b5850afea34300..4452455aef7fa8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) +/* Return false if a subflow has been reset, else return true */ +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); @@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); - return; + return true; } mptcp_get_options(sk, skb, &mp_opt); + + /* The subflow can be in close state only if check_fully_established() + * just sent a reset. If so, tell the caller to ignore the current packet. + */ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) - return; + return sk->sk_state != TCP_CLOSE; if (mp_opt.fastclose && msk->local_key == mp_opt.rcvr_key) { @@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } if (!mp_opt.dss) - return; + return true; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck @@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) schedule_work(&msk->work)) sock_hold(subflow->conn); - return; + return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return; + return true; memset(mpext, 0, sizeof(*mpext)); @@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (mpext->csum_reqd) mpext->csum = mp_opt.csum; } + + return true; } static void mptcp_set_rwin(const struct tcp_sock *tp) From a7da441621c7945fbfd43ed239c93b8073cda502 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:50 -0700 Subject: [PATCH 194/714] selftests: mptcp: fix case multiple subflows limited by server After patch "mptcp: fix syncookie process if mptcp can not_accept new subflow", if subflow is limited, MP_JOIN SYN is dropped, and no SYN/ACK will be replied. So in case "multiple subflows limited by server", the expected SYN/ACK number should be 1. Fixes: 00587187ad30 ("selftests: mptcp: add test cases for mptcp join tests with syn cookies") Reported-by: kernel test robot Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9a191c1a5de8d4..f02f4de2f3a088 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1409,7 +1409,7 @@ syncookies_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr "subflows limited by server w cookies" 2 2 1 + chk_join_nr "subflows limited by server w cookies" 2 1 1 # test signal address with cookies reset_with_cookies From ce599c516386f09ca30848a1a4eb93d3fffbe187 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 9 Jul 2021 17:20:51 -0700 Subject: [PATCH 195/714] mptcp: properly account bulk freed memory After commit 879526030c8b ("mptcp: protect the rx path with the msk socket spinlock") the rmem currently used by a given msk is really sk_rmem_alloc - rmem_released. The safety check in mptcp_data_ready() does not take the above in due account, as a result legit incoming data is kept in subflow receive queue with no reason, delaying or blocking MPTCP-level ack generation. This change addresses the issue introducing a new helper to fetch the rmem memory and using it as needed. Additionally add a MIB counter for the exceptional event described above - the peer is misbehaving. Finally, introduce the required annotation when rmem_released is updated. Fixes: 879526030c8b ("mptcp: protect the rx path with the msk socket spinlock") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/211 Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 1 + net/mptcp/protocol.c | 12 +++++++----- net/mptcp/protocol.h | 10 +++++++++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 52ea2517e85602..ff2cc0e3273df1 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), + SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 193466c9b5494a..0663cb12b448bf 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -37,6 +37,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ + MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7a5afa8c686686..a8892494781522 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) bool cleanup, rx_empty; cleanup = (space > 0) && (space >= (old_space << 1)); - rx_empty = !atomic_read(&sk->sk_rmem_alloc); + rx_empty = !__mptcp_rmem(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) + if (__mptcp_rmem(sk) > sk_rbuf) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); return; + } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); @@ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, if (!(flags & MSG_PEEK)) { /* we will bulk release the skb memory later */ skb->destructor = NULL; - msk->rmem_released += skb->truesize; + WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); __skb_unlink(skb, &msk->receive_queue); __kfree_skb(skb); } @@ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk) atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); sk_mem_uncharge(sk, msk->rmem_released); - msk->rmem_released = 0; + WRITE_ONCE(msk->rmem_released, 0); } static void __mptcp_splice_receive_queue(struct sock *sk) @@ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; msk->wmem_reserved = 0; - msk->rmem_released = 0; + WRITE_ONCE(msk->rmem_released, 0); msk->tx_pending_data = 0; msk->first = NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 426ed80fe72f74..0f0c026c5f8bb2 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) return (struct mptcp_sock *)sk; } +/* the msk socket don't use the backlog, also account for the bulk + * free memory + */ +static inline int __mptcp_rmem(const struct sock *sk) +{ + return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); +} + static inline int __mptcp_space(const struct sock *sk) { - return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) From 222722bc6ebfabf5d54467070f05cf9c0a55ea8c Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Sat, 10 Jul 2021 11:32:49 +0800 Subject: [PATCH 196/714] virtio_net: check virtqueue_add_sgs() return value As virtqueue_add_sgs() can fail, we should check the return value. Addresses-Coverity-ID: 1464439 ("Unchecked return value") Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b0b81458ca94eb..13952e2dba5e97 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1743,6 +1743,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, { struct scatterlist *sgs[4], hdr, stat; unsigned out_num = 0, tmp; + int ret; /* Caller should know better */ BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)); @@ -1762,7 +1763,12 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, sgs[out_num] = &stat; BUG_ON(out_num + 1 > ARRAY_SIZE(sgs)); - virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); + ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC); + if (ret < 0) { + dev_warn(&vi->vdev->dev, + "Failed to add sgs for command vq: %d\n.", ret); + return false; + } if (unlikely(!virtqueue_kick(vi->cvq))) return vi->ctrl->status == VIRTIO_NET_OK; From 84f7e0bb4809f4497124b6b6904c07c8a0c73c58 Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Sun, 11 Jul 2021 18:12:56 +0200 Subject: [PATCH 197/714] dsa: fix for_each_child.cocci warnings For_each_available_child_of_node should have of_node_put() before return around line 423. Generated by: scripts/coccinelle/iterators/for_each_child.cocci CC: Alexander Lobakin Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a7e5ac60baef23..1542bfb8b5e54a 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -419,8 +419,10 @@ int ksz_switch_register(struct ksz_device *dev, if (of_property_read_u32(port, "reg", &port_num)) continue; - if (!(dev->port_mask & BIT(port_num))) + if (!(dev->port_mask & BIT(port_num))) { + of_node_put(port); return -EINVAL; + } of_get_phy_mode(port, &dev->ports[port_num].interface); } From a5de4be0aaaa66a2fa98e8a33bdbed3bd0682804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sun, 11 Jul 2021 18:38:15 +0200 Subject: [PATCH 198/714] net: phy: marvell10g: fix differentiation of 88X3310 from 88X3340 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that we cannot differentiate 88X3310 from 88X3340 by simply looking at bit 3 of revision ID. This only works on revisions A0 and A1. On revision B0, this bit is always 1. Instead use the 3.d00d register for differentiation, since this register contains information about number of ports on the device. Fixes: 9885d016ffa9 ("net: phy: marvell10g: add separate structure for 88X3340") Signed-off-by: Marek Behún Reported-by: Matteo Croce Tested-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 40 +++++++++++++++++++++++++++++++----- include/linux/marvell_phy.h | 6 +----- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index bbbc6ac8fa8257..53a433442803a3 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -78,6 +78,11 @@ enum { /* Temperature read register (88E2110 only) */ MV_PCS_TEMP = 0x8042, + /* Number of ports on the device */ + MV_PCS_PORT_INFO = 0xd00d, + MV_PCS_PORT_INFO_NPORTS_MASK = 0x0380, + MV_PCS_PORT_INFO_NPORTS_SHIFT = 7, + /* These registers appear at 0x800X and 0xa00X - the 0xa00X control * registers appear to set themselves to the 0x800X when AN is * restarted, but status registers appear readable from either. @@ -966,6 +971,30 @@ static const struct mv3310_chip mv2111_type = { #endif }; +static int mv3310_get_number_of_ports(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_PORT_INFO); + if (ret < 0) + return ret; + + ret &= MV_PCS_PORT_INFO_NPORTS_MASK; + ret >>= MV_PCS_PORT_INFO_NPORTS_SHIFT; + + return ret + 1; +} + +static int mv3310_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 1; +} + +static int mv3340_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 4; +} + static int mv211x_match_phy_device(struct phy_device *phydev, bool has_5g) { int val; @@ -994,7 +1023,8 @@ static int mv2111_match_phy_device(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3310_match_phy_device, .name = "mv88x3310", .driver_data = &mv3310_type, .get_features = mv3310_get_features, @@ -1011,8 +1041,9 @@ static struct phy_driver mv3310_drivers[] = { .set_loopback = genphy_c45_loopback, }, { - .phy_id = MARVELL_PHY_ID_88X3340, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id = MARVELL_PHY_ID_88X3310, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3340_match_phy_device, .name = "mv88x3340", .driver_data = &mv3340_type, .get_features = mv3310_get_features, @@ -1069,8 +1100,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_88X33X0_MASK }, - { MARVELL_PHY_ID_88X3340, MARVELL_PHY_ID_88X33X0_MASK }, + { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E2110, MARVELL_PHY_ID_MASK }, { }, }; diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index acee44b9db269a..0f06c2287b5279 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -22,14 +22,10 @@ #define MARVELL_PHY_ID_88E1545 0x01410ea0 #define MARVELL_PHY_ID_88E1548P 0x01410ec0 #define MARVELL_PHY_ID_88E3016 0x01410e60 +#define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 -/* PHY IDs and mask for Alaska 10G PHYs */ -#define MARVELL_PHY_ID_88X33X0_MASK 0xfffffff8 -#define MARVELL_PHY_ID_88X3310 0x002b09a0 -#define MARVELL_PHY_ID_88X3340 0x002b09a8 - /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 From 04bef83a3358946bfc98a5ecebd1b0003d83d882 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 11 Jul 2021 12:56:28 +0300 Subject: [PATCH 199/714] net: bridge: multicast: fix PIM hello router port marking race When a PIM hello packet is received on a bridge port with multicast snooping enabled, we mark it as a router port automatically, that includes adding that port the router port list. The multicast lock protects that list, but it is not acquired in the PIM message case leading to a race condition, we need to take it to fix the race. Cc: stable@vger.kernel.org Fixes: 91b02d3d133b ("bridge: mcast: add router port on PIM hello message") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 53c3a9d80d9c74..3bbbc6d7b7c38e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3264,7 +3264,9 @@ static void br_multicast_pim(struct net_bridge *br, pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) return; + spin_lock(&br->multicast_lock); br_ip4_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); } static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, From 000b7287b67555fee39d39fff75229dedde0dcbf Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 11 Jul 2021 12:56:29 +0300 Subject: [PATCH 200/714] net: bridge: multicast: fix MRD advertisement router port marking race When an MRD advertisement is received on a bridge port with multicast snooping enabled, we mark it as a router port automatically, that includes adding that port to the router port list. The multicast lock protects that list, but it is not acquired in the MRD advertisement case leading to a race condition, we need to take it to fix the race. Cc: stable@vger.kernel.org Cc: linus.luessing@c0d3.blue Fixes: 4b3087c7e37f ("bridge: Snoop Multicast Router Advertisements") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3bbbc6d7b7c38e..d0434dc8c03b83 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3277,7 +3277,9 @@ static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, igmp_hdr(skb)->type != IGMP_MRDISC_ADV) return -ENOMSG; + spin_lock(&br->multicast_lock); br_ip4_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); return 0; } @@ -3345,7 +3347,9 @@ static void br_ip6_multicast_mrd_rcv(struct net_bridge *br, if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) return; + spin_lock(&br->multicast_lock); br_ip6_multicast_mark_router(br, port); + spin_unlock(&br->multicast_lock); } static int br_multicast_ipv6_rcv(struct net_bridge *br, From 9c6882608bce249a8918744ecdb65748534e3f17 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 10 Jul 2021 02:45:59 +0100 Subject: [PATCH 201/714] io_uring: use right task for exiting checks When we use delayed_work for fallback execution of requests, current will be not of the submitter task, and so checks in io_req_task_submit() may not behave as expected. Currently, it leaves inline completions not flushed, so making io_ring_exit_work() to hang. Use the submitter task for all those checks. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cb413c715bed0bc9c98b169059ea9c8a2c770715.1625881431.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d94fb5835a20e8..118215211bb2ba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2016,7 +2016,7 @@ static void io_req_task_submit(struct io_kiocb *req) /* ctx stays valid until unlock, even if we drop all ours ctx->refs */ mutex_lock(&ctx->uring_lock); - if (!(current->flags & PF_EXITING) && !current->in_execve) + if (!(req->task->flags & PF_EXITING) && !req->task->in_execve) __io_queue_sqe(req); else io_req_complete_failed(req, -EFAULT); From 1b48773f9fd09f311d1166ce1dd50652ebe05218 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 11 Jul 2021 22:41:13 +0100 Subject: [PATCH 202/714] io_uring: fix io_drain_req() io_drain_req() return whether the request has been consumed or not, not an error code. Fix a stupid mistake slipped from optimisation patches. Reported-by: syzbot+ba6fcd859210f4e9e109@syzkaller.appspotmail.com Fixes: 76cc33d79175a ("io_uring: refactor io_req_defer()") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4d3c53c4274ffff307c8ae062fc7fda63b978df2.1626039606.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 118215211bb2ba..0cac361bf6b8de 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6019,11 +6019,13 @@ static bool io_drain_req(struct io_kiocb *req) ret = io_req_prep_async(req); if (ret) - return ret; + goto fail; io_prep_async_link(req); de = kmalloc(sizeof(*de), GFP_KERNEL); if (!de) { - io_req_complete_failed(req, -ENOMEM); + ret = -ENOMEM; +fail: + io_req_complete_failed(req, ret); return true; } From 0dfc21c1a4cac321749a53c92da616d9546d00e3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 8 Jul 2021 12:34:31 +0200 Subject: [PATCH 203/714] ASoC: tegra: Use ADMAIF component for DMA allocations DMA memory is currently allocated for the soundcard device, which is a virtual device added for the sole purpose of "stitching" together the audio device. It is not a real device and therefore doesn't have a DMA mask or a description of the path to and from memory of accesses. Memory accesses really originate from the ADMA controller that provides the DMA channels used by the PCM component. However, since the DMA memory is allocated up-front and the DMA channels aren't known at that point, there is no way of knowing the DMA channel provider at allocation time. The next best physical device in the memory path is the ADMAIF. Use it as the device to allocate DMA memory to. iommus and interconnects device tree properties can thus be added to the ADMAIF device tree node to describe the memory access path for audio. Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20210708103432.1690385-2-thierry.reding@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_pcm.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 573374b89b100a..d3276b4595affb 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -213,19 +213,19 @@ snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component, } EXPORT_SYMBOL_GPL(tegra_pcm_pointer); -static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream, +static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream, size_t size) { struct snd_pcm_substream *substream = pcm->streams[stream].substream; struct snd_dma_buffer *buf = &substream->dma_buffer; - buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL); + buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL); if (!buf->area) return -ENOMEM; buf->private_data = NULL; buf->dev.type = SNDRV_DMA_TYPE_DEV; - buf->dev.dev = pcm->card->dev; + buf->dev.dev = dev; buf->bytes = size; return 0; @@ -244,31 +244,28 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream) if (!buf->area) return; - dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr); + dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr); buf->area = NULL; } -static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd, +static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd, size_t size) { - struct snd_card *card = rtd->card->snd_card; struct snd_pcm *pcm = rtd->pcm; int ret; - ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret < 0) return ret; if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_PLAYBACK, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size); if (ret) goto err; } if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_CAPTURE, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size); if (ret) goto err_free_play; } @@ -284,7 +281,16 @@ static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd, int tegra_pcm_construct(struct snd_soc_component *component, struct snd_soc_pcm_runtime *rtd) { - return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max); + struct device *dev = component->dev; + + /* + * Fallback for backwards-compatibility with older device trees that + * have the iommus property in the virtual, top-level "sound" node. + */ + if (!of_get_property(dev->of_node, "iommus", NULL)) + dev = rtd->card->snd_card->dev; + + return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max); } EXPORT_SYMBOL_GPL(tegra_pcm_construct); From 2169d6a0f0721935410533281fc7e87e4e2322d1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 8 Jul 2021 11:12:55 +0200 Subject: [PATCH 204/714] ASoC: tlv320aic32x4: Fix TAS2505 volume controls None of the TAS2505 outputs are stereo, do not pretend they are by implementing them using SOC*DOUBLE* macros referencing the same register twice, use SOC*SINGLE* instead. Fix volume ranges and mute control for the codec according to datasheet. Fixes: b4525b6196cd7 ("ASoC: tlv320aic32x4: add support for TAS2505") Signed-off-by: Marek Vasut Cc: Claudius Heine Cc: Mark Brown Link: https://lore.kernel.org/r/20210708091255.56502-1-marex@denx.de Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index c63b717040ed25..dcd8aeb45cb317 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -250,8 +250,8 @@ static DECLARE_TLV_DB_SCALE(tlv_pcm, -6350, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0); /* -12dB min, 0.5dB steps */ static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0); - -static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0); +/* -6dB min, 1dB steps */ +static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1); static const char * const lo_cm_text[] = { @@ -1063,21 +1063,20 @@ static const struct snd_soc_component_driver soc_component_dev_aic32x4 = { }; static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = { - SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL, - AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm), + SOC_SINGLE_S8_TLV("PCM Playback Volume", + AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm), SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum), - SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0, - tlv_driver_gain), - SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 6, 0x01, 1), - SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0), + SOC_SINGLE_TLV("HP Driver Gain Volume", + AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1), - SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1, - 0, 0, 117, 1, tlv_spk_vol), - SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2, - 4, 5, 0, tlv_amp_vol), + SOC_SINGLE_TLV("Speaker Driver Playback Volume", + TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", + TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol), + + SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0), }; static const struct snd_kcontrol_new hp_output_mixer_controls[] = { From 6c621b811f99feb3941f04b386795b45f47cd771 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 7 Jul 2021 17:02:34 +0100 Subject: [PATCH 205/714] ASoC: tlv320aic31xx: Make regmap cache only on probe() Currently the tlv320aic31xx driver has regulator support but does not enable the regulators during probe, deferring this until something causes ASoC to make the card active. It does put the device into cache only mode but only when the component level probe is called, however if interrupts are in use the driver will access the regmap before then which if the regulators are not powered on would cause I/O problems. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210707160234.16253-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 51870d50f4195e..b504d63385b38b 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1604,6 +1604,8 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, ret); return ret; } + regcache_cache_only(aic31xx->regmap, true); + aic31xx->dev = &i2c->dev; aic31xx->irq = i2c->irq; From 1c73daee4bf30ccdff5e86dc400daa6f74735da5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 30 Jun 2021 17:59:59 +0800 Subject: [PATCH 206/714] regulator: hi6421: Fix getting wrong drvdata Since config.dev = pdev->dev.parent in current code, so dev_get_drvdata(rdev->dev.parent) call in hi6421_regulator_enable returns the drvdata of the mfd device rather than the regulator. Fix it. This was broken while converting to use simplified DT parsing because the config.dev changed from pdev->dev to pdev->dev.parent for parsing the parent's of_node. Fixes: 29dc269a85ef ("regulator: hi6421: Convert to use simplified DT parsing") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210630095959.2411543-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/hi6421-regulator.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c index bff8c515dcde7d..d144a4bdb76dab 100644 --- a/drivers/regulator/hi6421-regulator.c +++ b/drivers/regulator/hi6421-regulator.c @@ -366,9 +366,8 @@ static struct hi6421_regulator_info static int hi6421_regulator_enable(struct regulator_dev *rdev) { - struct hi6421_regulator_pdata *pdata; + struct hi6421_regulator_pdata *pdata = rdev_get_drvdata(rdev); - pdata = dev_get_drvdata(rdev->dev.parent); /* hi6421 spec requires regulator enablement must be serialized: * - Because when BUCK, LDO switching from off to on, it will have * a huge instantaneous current; so you can not turn on two or @@ -385,9 +384,10 @@ static int hi6421_regulator_enable(struct regulator_dev *rdev) static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int reg_val; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & info->mode_mask) return REGULATOR_MODE_IDLE; @@ -397,9 +397,10 @@ static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev) static unsigned int hi6421_regulator_buck_get_mode(struct regulator_dev *rdev) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int reg_val; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & info->mode_mask) return REGULATOR_MODE_STANDBY; @@ -410,9 +411,10 @@ static unsigned int hi6421_regulator_buck_get_mode(struct regulator_dev *rdev) static int hi6421_regulator_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int new_mode; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: new_mode = 0; @@ -434,9 +436,10 @@ static int hi6421_regulator_ldo_set_mode(struct regulator_dev *rdev, static int hi6421_regulator_buck_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int new_mode; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: new_mode = 0; @@ -459,7 +462,9 @@ static unsigned int hi6421_regulator_ldo_get_optimum_mode(struct regulator_dev *rdev, int input_uV, int output_uV, int load_uA) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; + + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); if (load_uA > info->eco_microamp) return REGULATOR_MODE_NORMAL; @@ -543,14 +548,13 @@ static int hi6421_regulator_probe(struct platform_device *pdev) if (!pdata) return -ENOMEM; mutex_init(&pdata->lock); - platform_set_drvdata(pdev, pdata); for (i = 0; i < ARRAY_SIZE(hi6421_regulator_info); i++) { /* assign per-regulator data */ info = &hi6421_regulator_info[i]; config.dev = pdev->dev.parent; - config.driver_data = info; + config.driver_data = pdata; config.regmap = pmic->regmap; rdev = devm_regulator_register(&pdev->dev, &info->desc, From 69e1818ad27bae167eeaaf6829d4a08900ef5153 Mon Sep 17 00:00:00 2001 From: Dan Sneddon Date: Tue, 29 Jun 2021 12:22:18 -0700 Subject: [PATCH 207/714] spi: atmel: Fix CS and initialization bug Commit 5fa5e6dec762 ("spi: atmel: Switch to transfer_one transfer method") switched to using transfer_one and set_cs. The core doesn't call set_cs when the chip select lines are gpios. Add the SPI_MASTER_GPIO_SS flag to the driver to ensure the calls to set_cs happen since the driver programs configuration registers there. Fixes: 5fa5e6dec762 ("spi: atmel: Switch to transfer_one transfer method") Signed-off-by: Dan Sneddon Link: https://lore.kernel.org/r/20210629192218.32125-1-dan.sneddon@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi-atmel.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 2ef74885ffa2f9..788dcdf25f0030 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -352,8 +352,6 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) } mr = spi_readl(as, MR); - if (spi->cs_gpiod) - gpiod_set_value(spi->cs_gpiod, 1); } else { u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; int i; @@ -369,8 +367,6 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) mr = spi_readl(as, MR); mr = SPI_BFINS(PCS, ~(1 << chip_select), mr); - if (spi->cs_gpiod) - gpiod_set_value(spi->cs_gpiod, 1); spi_writel(as, MR, mr); } @@ -400,8 +396,6 @@ static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi) if (!spi->cs_gpiod) spi_writel(as, CR, SPI_BIT(LASTXFER)); - else - gpiod_set_value(spi->cs_gpiod, 0); } static void atmel_spi_lock(struct atmel_spi *as) __acquires(&as->lock) @@ -1483,7 +1477,8 @@ static int atmel_spi_probe(struct platform_device *pdev) master->bus_num = pdev->id; master->num_chipselect = 4; master->setup = atmel_spi_setup; - master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX); + master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX | + SPI_MASTER_GPIO_SS); master->transfer_one = atmel_spi_one_transfer; master->set_cs = atmel_spi_set_cs; master->cleanup = atmel_spi_cleanup; From 3a70dd2d050331ee4cf5ad9d5c0a32d83ead9a43 Mon Sep 17 00:00:00 2001 From: Peter Hess Date: Tue, 6 Jul 2021 14:16:09 +0200 Subject: [PATCH 208/714] spi: mediatek: fix fifo rx mode In FIFO mode were two problems: - RX mode was never handled and - in this case the tx_buf pointer was NULL and caused an exception fix this by handling RX mode in mtk_spi_fifo_transfer Fixes: a568231f4632 ("spi: mediatek: Add spi bus for Mediatek MT8173") Signed-off-by: Peter Hess Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20210706121609.680534-1-linux@fw-web.de Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 976f73b9e29983..8d5fa7f1e5069a 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -427,13 +427,23 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mtk_spi_setup_packet(master); cnt = xfer->len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); + if (xfer->tx_buf) + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); + + if (xfer->rx_buf) + ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt); remainder = xfer->len % 4; if (remainder > 0) { reg_val = 0; - memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); - writel(reg_val, mdata->base + SPI_TX_DATA_REG); + if (xfer->tx_buf) { + memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } + if (xfer->rx_buf) { + reg_val = readl(mdata->base + SPI_RX_DATA_REG); + memcpy(xfer->rx_buf + (cnt * 4), ®_val, remainder); + } } mtk_spi_enable_transfer(master); From 5937e00017f1d1dd4551e723ebfa306671f27843 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Jun 2021 09:09:14 -0500 Subject: [PATCH 209/714] xfs: Fix multiple fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix the following warnings by replacing /* fallthrough */ comments, and its variants, with the new pseudo-keyword macro fallthrough: fs/xfs/libxfs/xfs_attr.c:487:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:500:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:532:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:594:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:607:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:1410:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:1445:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_attr.c:1473:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Notice that Clang doesn't recognize /* fallthrough */ comments as implicit fall-through markings, so in order to globally enable -Wimplicit-fallthrough for Clang, these comments need to be replaced with fallthrough; in the whole codebase. Link: https://github.com/KSPP/linux/issues/115 Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- fs/xfs/libxfs/xfs_attr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index d9d7d5137b73f6..191d517259889f 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -483,7 +483,7 @@ xfs_attr_set_iter( if (error) return error; - /* fallthrough */ + fallthrough; case XFS_DAS_RM_LBLK: /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ dac->dela_state = XFS_DAS_RM_LBLK; @@ -496,7 +496,7 @@ xfs_attr_set_iter( return -EAGAIN; } - /* fallthrough */ + fallthrough; case XFS_DAS_RD_LEAF: /* * This is the last step for leaf format. Read the block with @@ -528,7 +528,7 @@ xfs_attr_set_iter( return error; } - /* fallthrough */ + fallthrough; case XFS_DAS_ALLOC_NODE: /* * If there was an out-of-line value, allocate the blocks we @@ -590,7 +590,7 @@ xfs_attr_set_iter( if (error) return error; - /* fallthrough */ + fallthrough; case XFS_DAS_RM_NBLK: /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ dac->dela_state = XFS_DAS_RM_NBLK; @@ -603,7 +603,7 @@ xfs_attr_set_iter( return -EAGAIN; } - /* fallthrough */ + fallthrough; case XFS_DAS_CLR_FLAG: /* * The last state for node format. Look up the old attr and @@ -1406,7 +1406,7 @@ xfs_attr_remove_iter( state = dac->da_state; } - /* fallthrough */ + fallthrough; case XFS_DAS_RMTBLK: dac->dela_state = XFS_DAS_RMTBLK; @@ -1441,7 +1441,7 @@ xfs_attr_remove_iter( return -EAGAIN; } - /* fallthrough */ + fallthrough; case XFS_DAS_RM_NAME: /* * If we came here fresh from a transaction roll, reattach all @@ -1469,7 +1469,7 @@ xfs_attr_remove_iter( return -EAGAIN; } - /* fallthrough */ + fallthrough; case XFS_DAS_RM_SHRINK: /* * If the result is small enough, push it all into the inode. From d5c9d0a207f4c61734ccd4b51818788e8b86296a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 00:44:53 -0500 Subject: [PATCH 210/714] mt76: mt7921: Fix fall-through warning for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix the following warning by explicitly adding a break statement: drivers/net/wireless/mediatek/mt76/mt7921/main.c:392:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 7fd21049ff5af7..63ec140c9c372f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -389,6 +389,7 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, case WLAN_CIPHER_SUITE_WEP104: if (!mvif->wep_sta) return -EOPNOTSUPP; + break; case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: From 4020f26b368c3e72450afedaefc2fd07ba301d20 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 00:47:57 -0500 Subject: [PATCH 211/714] nfp: flower-ct: Fix fall-through warning for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix the following warning by explicitly adding a break statement: drivers/net/ethernet/netronome/nfp/flower/conntrack.c:1175:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva --- drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 273d529d43c207..684bb3efe0cf4d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1172,6 +1172,7 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) nfp_ct_map_params); nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry); kfree(ct_map_ent); + break; default: break; } From 81eb1d17115fba5ea67a4939a136888a7ec05c32 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 00:51:03 -0500 Subject: [PATCH 212/714] drm/i915: Fix fall-through warning for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a return; statement: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:65:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f4fb68e8955ad1..e382b7f2353b89 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -62,6 +62,7 @@ static void try_to_writeback(struct drm_i915_gem_object *obj, switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); + return; case __I915_MADV_PURGED: return; } From 94b619a07655805a1622484967754f5848640456 Mon Sep 17 00:00:00 2001 From: Marco De Marco Date: Mon, 5 Jul 2021 19:44:21 +0000 Subject: [PATCH 213/714] USB: serial: option: add support for u-blox LARA-R6 family The patch is meant to support LARA-R6 Cat 1 module family. Module USB ID: Vendor ID: 0x05c6 Product ID: 0x90fA Interface layout: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: QMI wwan (not available in all versions) Signed-off-by: Marco De Marco Link: https://lore.kernel.org/r/49260184.kfMIbaSn9k@mars Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7608584ef4fe78..0fbe253dc570bc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 +#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1101,6 +1102,8 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, From 2fa9fd69b3ee015a873e44f7c645ad7bcb79d290 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 26 Jun 2021 09:13:35 +0100 Subject: [PATCH 214/714] clk: renesas: rzg2l: Add multi clock PM support Add multi clock PM support for cpg driver. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210626081344.5783-2-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/renesas-rzg2l-cpg.c | 51 ++++++++++++++----------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.c b/drivers/clk/renesas/renesas-rzg2l-cpg.c index 5009b9e48b13b6..1941f13bd922cf 100644 --- a/drivers/clk/renesas/renesas-rzg2l-cpg.c +++ b/drivers/clk/renesas/renesas-rzg2l-cpg.c @@ -594,42 +594,49 @@ static int rzg2l_cpg_attach_dev(struct generic_pm_domain *unused, struct device { struct device_node *np = dev->of_node; struct of_phandle_args clkspec; + bool once = true; struct clk *clk; int error; int i = 0; while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i, &clkspec)) { - if (rzg2l_cpg_is_pm_clk(&clkspec)) - goto found; - - of_node_put(clkspec.np); + if (rzg2l_cpg_is_pm_clk(&clkspec)) { + if (once) { + once = false; + error = pm_clk_create(dev); + if (error) { + of_node_put(clkspec.np); + goto err; + } + } + clk = of_clk_get_from_provider(&clkspec); + of_node_put(clkspec.np); + if (IS_ERR(clk)) { + error = PTR_ERR(clk); + goto fail_destroy; + } + + error = pm_clk_add_clk(dev, clk); + if (error) { + dev_err(dev, "pm_clk_add_clk failed %d\n", + error); + goto fail_put; + } + } else { + of_node_put(clkspec.np); + } i++; } return 0; -found: - clk = of_clk_get_from_provider(&clkspec); - of_node_put(clkspec.np); - - if (IS_ERR(clk)) - return PTR_ERR(clk); - - error = pm_clk_create(dev); - if (error) - goto fail_put; - - error = pm_clk_add_clk(dev, clk); - if (error) - goto fail_destroy; - - return 0; +fail_put: + clk_put(clk); fail_destroy: pm_clk_destroy(dev); -fail_put: - clk_put(clk); +err: return error; } From e93c1373613fb2f3e59db5f13271f155820e6a67 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 26 Jun 2021 09:13:36 +0100 Subject: [PATCH 215/714] clk: renesas: r9a07g044: Rename divider table As per RZ/G2L HW Manual (Rev.0.50), CPG_PL3A_DDIV,CPG_PL3B_DDIV and CPG_PL2_DDIV(for P0) shares same divider table entries. Rename clk_div_table dtable_3b to clk_div_table dtable_1_32 so that it can be reused. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210626081344.5783-3-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r9a07g044-cpg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 50b5269586a4ee..d5803fb1242ee2 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -42,12 +42,13 @@ enum clk_ids { }; /* Divider tables */ -static const struct clk_div_table dtable_3b[] = { +static const struct clk_div_table dtable_1_32[] = { {0, 1}, {1, 2}, {2, 4}, {3, 8}, {4, 32}, + {0, 0}, }; static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { @@ -72,10 +73,10 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { /* Core output clk */ DEF_FIXED("I", R9A07G044_CLK_I, CLK_PLL1, 1, 1), DEF_DIV("P0", R9A07G044_CLK_P0, CLK_PLL2_DIV16, DIVPL2A, - dtable_3b, CLK_DIVIDER_HIWORD_MASK), + dtable_1_32, CLK_DIVIDER_HIWORD_MASK), DEF_FIXED("TSU", R9A07G044_CLK_TSU, CLK_PLL2_DIV20, 1, 1), DEF_DIV("P1", R9A07G044_CLK_P1, CLK_PLL3_DIV8, - DIVPL3B, dtable_3b, CLK_DIVIDER_HIWORD_MASK), + DIVPL3B, dtable_1_32, CLK_DIVIDER_HIWORD_MASK), }; static struct rzg2l_mod_clk r9a07g044_mod_clks[] = { From fd8c3f6c36eb093039d4aeb20cceee00c7c6ba1a Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 26 Jun 2021 09:13:37 +0100 Subject: [PATCH 216/714] clk: renesas: r9a07g044: Fix P1 Clock As per RZ/G2L HW Manual(Rev.0.50) P1 is sourced from pll3_div2_4. So fix the clock definitions for P1. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210626081344.5783-4-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r9a07g044-cpg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index d5803fb1242ee2..d895c1cef1fa6a 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -30,8 +30,8 @@ enum clk_ids { CLK_PLL2_DIV20, CLK_PLL3, CLK_PLL3_DIV2, + CLK_PLL3_DIV2_4, CLK_PLL3_DIV4, - CLK_PLL3_DIV8, CLK_PLL4, CLK_PLL5, CLK_PLL5_DIV2, @@ -67,15 +67,15 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED(".pll2_div20", CLK_PLL2_DIV20, CLK_PLL2, 1, 20), DEF_FIXED(".pll3_div2", CLK_PLL3_DIV2, CLK_PLL3, 1, 2), + DEF_FIXED(".pll3_div2_4", CLK_PLL3_DIV2_4, CLK_PLL3_DIV2, 1, 4), DEF_FIXED(".pll3_div4", CLK_PLL3_DIV4, CLK_PLL3, 1, 4), - DEF_FIXED(".pll3_div8", CLK_PLL3_DIV8, CLK_PLL3, 1, 8), /* Core output clk */ DEF_FIXED("I", R9A07G044_CLK_I, CLK_PLL1, 1, 1), DEF_DIV("P0", R9A07G044_CLK_P0, CLK_PLL2_DIV16, DIVPL2A, dtable_1_32, CLK_DIVIDER_HIWORD_MASK), DEF_FIXED("TSU", R9A07G044_CLK_TSU, CLK_PLL2_DIV20, 1, 1), - DEF_DIV("P1", R9A07G044_CLK_P1, CLK_PLL3_DIV8, + DEF_DIV("P1", R9A07G044_CLK_P1, CLK_PLL3_DIV2_4, DIVPL3B, dtable_1_32, CLK_DIVIDER_HIWORD_MASK), }; From 668756f7299d2d3c75add17cb415717e247450ef Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 26 Jun 2021 09:13:38 +0100 Subject: [PATCH 217/714] clk: renesas: r9a07g044: Add P2 Clock support Add support for P2 clock which is sourced from pll3_div2_4_2. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210626081344.5783-5-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/clk/renesas/r9a07g044-cpg.c | 4 ++++ drivers/clk/renesas/renesas-rzg2l-cpg.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index d895c1cef1fa6a..70df4feda417de 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -31,6 +31,7 @@ enum clk_ids { CLK_PLL3, CLK_PLL3_DIV2, CLK_PLL3_DIV2_4, + CLK_PLL3_DIV2_4_2, CLK_PLL3_DIV4, CLK_PLL4, CLK_PLL5, @@ -68,6 +69,7 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED(".pll3_div2", CLK_PLL3_DIV2, CLK_PLL3, 1, 2), DEF_FIXED(".pll3_div2_4", CLK_PLL3_DIV2_4, CLK_PLL3_DIV2, 1, 4), + DEF_FIXED(".pll3_div2_4_2", CLK_PLL3_DIV2_4_2, CLK_PLL3_DIV2_4, 1, 2), DEF_FIXED(".pll3_div4", CLK_PLL3_DIV4, CLK_PLL3, 1, 4), /* Core output clk */ @@ -77,6 +79,8 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED("TSU", R9A07G044_CLK_TSU, CLK_PLL2_DIV20, 1, 1), DEF_DIV("P1", R9A07G044_CLK_P1, CLK_PLL3_DIV2_4, DIVPL3B, dtable_1_32, CLK_DIVIDER_HIWORD_MASK), + DEF_DIV("P2", R9A07G044_CLK_P2, CLK_PLL3_DIV2_4_2, + DIVPL3A, dtable_1_32, CLK_DIVIDER_HIWORD_MASK), }; static struct rzg2l_mod_clk r9a07g044_mod_clks[] = { diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.h b/drivers/clk/renesas/renesas-rzg2l-cpg.h index 3948bdd8afc90c..a6a3bade1985eb 100644 --- a/drivers/clk/renesas/renesas-rzg2l-cpg.h +++ b/drivers/clk/renesas/renesas-rzg2l-cpg.h @@ -21,6 +21,7 @@ #define DDIV_PACK(offset, bitpos, size) \ (((offset) << 20) | ((bitpos) << 12) | ((size) << 8)) #define DIVPL2A DDIV_PACK(CPG_PL2_DDIV, 0, 3) +#define DIVPL3A DDIV_PACK(CPG_PL3A_DDIV, 0, 3) #define DIVPL3B DDIV_PACK(CPG_PL3A_DDIV, 4, 3) /** From c3e67ad6f5a2c698a055fb297c6f9962f5145edd Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 26 Jun 2021 09:13:39 +0100 Subject: [PATCH 218/714] dt-bindings: clock: r9a07g044-cpg: Update clock/reset definitions Update clock and reset definitions as per RZ/G2L_clock_list_r02_02.xlsx and RZ/G2L HW(Rev.0.50) manual. Update {GIC,IA55,SCIF} clock and reset entries in the CPG driver, and separate reset from module clocks in order to handle them efficiently. Update the SCIF0 clock and reset index in the SoC DTSI. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210626081344.5783-6-biju.das.jz@bp.renesas.com Link: https://lore.kernel.org/r/20210626081344.5783-7-biju.das.jz@bp.renesas.com Link: https://lore.kernel.org/r/20210626081344.5783-8-biju.das.jz@bp.renesas.com [geert: Squashed 3 commits] Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 4 +- drivers/clk/renesas/r9a07g044-cpg.c | 62 +++--- drivers/clk/renesas/renesas-rzg2l-cpg.c | 59 +++--- drivers/clk/renesas/renesas-rzg2l-cpg.h | 36 +++- include/dt-bindings/clock/r9a07g044-cpg.h | 236 ++++++++++++++++----- 5 files changed, 278 insertions(+), 119 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index 734c8adeceba07..01482d2275069c 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -82,10 +82,10 @@ ; interrupt-names = "eri", "rxi", "txi", "bri", "dri", "tei"; - clocks = <&cpg CPG_MOD R9A07G044_CLK_SCIF0>; + clocks = <&cpg CPG_MOD R9A07G044_SCIF0_CLK_PCK>; clock-names = "fck"; power-domains = <&cpg>; - resets = <&cpg R9A07G044_CLK_SCIF0>; + resets = <&cpg R9A07G044_SCIF0_RST_SYSTEM_N>; status = "disabled"; }; diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 70df4feda417de..ae24e0397d3cda 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -84,34 +84,40 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { }; static struct rzg2l_mod_clk r9a07g044_mod_clks[] = { - DEF_MOD("gic", R9A07G044_CLK_GIC600, - R9A07G044_CLK_P1, - 0x514, BIT(0), (BIT(0) | BIT(1))), - DEF_MOD("ia55", R9A07G044_CLK_IA55, - R9A07G044_CLK_P1, - 0x518, (BIT(0) | BIT(1)), BIT(0)), - DEF_MOD("scif0", R9A07G044_CLK_SCIF0, - R9A07G044_CLK_P0, - 0x584, BIT(0), BIT(0)), - DEF_MOD("scif1", R9A07G044_CLK_SCIF1, - R9A07G044_CLK_P0, - 0x584, BIT(1), BIT(1)), - DEF_MOD("scif2", R9A07G044_CLK_SCIF2, - R9A07G044_CLK_P0, - 0x584, BIT(2), BIT(2)), - DEF_MOD("scif3", R9A07G044_CLK_SCIF3, - R9A07G044_CLK_P0, - 0x584, BIT(3), BIT(3)), - DEF_MOD("scif4", R9A07G044_CLK_SCIF4, - R9A07G044_CLK_P0, - 0x584, BIT(4), BIT(4)), - DEF_MOD("sci0", R9A07G044_CLK_SCI0, - R9A07G044_CLK_P0, - 0x588, BIT(0), BIT(0)), + DEF_MOD("gic", R9A07G044_GIC600_GICCLK, R9A07G044_CLK_P1, + 0x514, 0), + DEF_MOD("ia55_pclk", R9A07G044_IA55_PCLK, R9A07G044_CLK_P2, + 0x518, 0), + DEF_MOD("ia55_clk", R9A07G044_IA55_CLK, R9A07G044_CLK_P1, + 0x518, 1), + DEF_MOD("scif0", R9A07G044_SCIF0_CLK_PCK, R9A07G044_CLK_P0, + 0x584, 0), + DEF_MOD("scif1", R9A07G044_SCIF1_CLK_PCK, R9A07G044_CLK_P0, + 0x584, 1), + DEF_MOD("scif2", R9A07G044_SCIF2_CLK_PCK, R9A07G044_CLK_P0, + 0x584, 2), + DEF_MOD("scif3", R9A07G044_SCIF3_CLK_PCK, R9A07G044_CLK_P0, + 0x584, 3), + DEF_MOD("scif4", R9A07G044_SCIF4_CLK_PCK, R9A07G044_CLK_P0, + 0x584, 4), + DEF_MOD("sci0", R9A07G044_SCI0_CLKP, R9A07G044_CLK_P0, + 0x588, 0), +}; + +static struct rzg2l_reset r9a07g044_resets[] = { + DEF_RST(R9A07G044_GIC600_GICRESET_N, 0x814, 0), + DEF_RST(R9A07G044_GIC600_DBG_GICRESET_N, 0x814, 1), + DEF_RST(R9A07G044_IA55_RESETN, 0x818, 0), + DEF_RST(R9A07G044_SCIF0_RST_SYSTEM_N, 0x884, 0), + DEF_RST(R9A07G044_SCIF1_RST_SYSTEM_N, 0x884, 1), + DEF_RST(R9A07G044_SCIF2_RST_SYSTEM_N, 0x884, 2), + DEF_RST(R9A07G044_SCIF3_RST_SYSTEM_N, 0x884, 3), + DEF_RST(R9A07G044_SCIF4_RST_SYSTEM_N, 0x884, 4), + DEF_RST(R9A07G044_SCI0_RST, 0x888, 0), }; static const unsigned int r9a07g044_crit_mod_clks[] __initconst = { - MOD_CLK_BASE + R9A07G044_CLK_GIC600, + MOD_CLK_BASE + R9A07G044_GIC600_GICCLK, }; const struct rzg2l_cpg_info r9a07g044_cpg_info = { @@ -128,5 +134,9 @@ const struct rzg2l_cpg_info r9a07g044_cpg_info = { /* Module Clocks */ .mod_clks = r9a07g044_mod_clks, .num_mod_clks = ARRAY_SIZE(r9a07g044_mod_clks), - .num_hw_mod_clks = R9A07G044_CLK_MIPI_DSI_PIN + 1, + .num_hw_mod_clks = R9A07G044_TSU_PCLK + 1, + + /* Resets */ + .resets = r9a07g044_resets, + .num_resets = ARRAY_SIZE(r9a07g044_resets), }; diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.c b/drivers/clk/renesas/renesas-rzg2l-cpg.c index 1941f13bd922cf..e7c59af2a1d85f 100644 --- a/drivers/clk/renesas/renesas-rzg2l-cpg.c +++ b/drivers/clk/renesas/renesas-rzg2l-cpg.c @@ -47,9 +47,9 @@ #define SDIV(val) DIV_RSMASK(val, 0, 0x7) #define CLK_ON_R(reg) (reg) -#define CLK_MON_R(reg) (0x680 - 0x500 + (reg)) -#define CLK_RST_R(reg) (0x800 - 0x500 + (reg)) -#define CLK_MRST_R(reg) (0x980 - 0x500 + (reg)) +#define CLK_MON_R(reg) (0x180 + (reg)) +#define CLK_RST_R(reg) (reg) +#define CLK_MRST_R(reg) (0x180 + (reg)) #define GET_REG_OFFSET(val) ((val >> 20) & 0xfff) #define GET_REG_SAMPLL_CLK1(val) ((val >> 22) & 0xfff) @@ -78,6 +78,7 @@ struct rzg2l_cpg_priv { struct clk **clks; unsigned int num_core_clks; unsigned int num_mod_clks; + unsigned int num_resets; unsigned int last_dt_core_clk; struct raw_notifier_head notifiers; @@ -315,15 +316,13 @@ rzg2l_cpg_register_core_clk(const struct cpg_core_clk *core, * * @hw: handle between common and hardware-specific interfaces * @off: register offset - * @onoff: ON/MON bits - * @reset: reset bits + * @bit: ON/MON bit * @priv: CPG/MSTP private data */ struct mstp_clock { struct clk_hw hw; u16 off; - u8 onoff; - u8 reset; + u8 bit; struct rzg2l_cpg_priv *priv; }; @@ -337,6 +336,7 @@ static int rzg2l_mod_clock_endisable(struct clk_hw *hw, bool enable) struct device *dev = priv->dev; unsigned long flags; unsigned int i; + u32 bitmask = BIT(clock->bit); u32 value; if (!clock->off) { @@ -349,9 +349,9 @@ static int rzg2l_mod_clock_endisable(struct clk_hw *hw, bool enable) spin_lock_irqsave(&priv->rmw_lock, flags); if (enable) - value = (clock->onoff << 16) | clock->onoff; + value = (bitmask << 16) | bitmask; else - value = clock->onoff << 16; + value = bitmask << 16; writel(value, priv->base + CLK_ON_R(reg)); spin_unlock_irqrestore(&priv->rmw_lock, flags); @@ -360,7 +360,7 @@ static int rzg2l_mod_clock_endisable(struct clk_hw *hw, bool enable) return 0; for (i = 1000; i > 0; --i) { - if (((readl(priv->base + CLK_MON_R(reg))) & clock->onoff)) + if (((readl(priv->base + CLK_MON_R(reg))) & bitmask)) break; cpu_relax(); } @@ -388,6 +388,7 @@ static int rzg2l_mod_clock_is_enabled(struct clk_hw *hw) { struct mstp_clock *clock = to_mod_clock(hw); struct rzg2l_cpg_priv *priv = clock->priv; + u32 bitmask = BIT(clock->bit); u32 value; if (!clock->off) { @@ -397,7 +398,7 @@ static int rzg2l_mod_clock_is_enabled(struct clk_hw *hw) value = readl(priv->base + CLK_MON_R(clock->off)); - return !(value & clock->onoff); + return !(value & bitmask); } static const struct clk_ops rzg2l_mod_clock_ops = { @@ -457,8 +458,7 @@ rzg2l_cpg_register_mod_clk(const struct rzg2l_mod_clk *mod, init.num_parents = 1; clock->off = mod->off; - clock->onoff = mod->onoff; - clock->reset = mod->reset; + clock->bit = mod->bit; clock->priv = priv; clock->hw.init = &init; @@ -483,12 +483,11 @@ static int rzg2l_cpg_reset(struct reset_controller_dev *rcdev, { struct rzg2l_cpg_priv *priv = rcdev_to_priv(rcdev); const struct rzg2l_cpg_info *info = priv->info; - unsigned int reg = info->mod_clks[id].off; - u32 dis = info->mod_clks[id].reset; + unsigned int reg = info->resets[id].off; + u32 dis = BIT(info->resets[id].bit); u32 we = dis << 16; - dev_dbg(rcdev->dev, "reset name:%s id:%ld offset:0x%x\n", - info->mod_clks[id].name, id, CLK_RST_R(reg)); + dev_dbg(rcdev->dev, "reset id:%ld offset:0x%x\n", id, CLK_RST_R(reg)); /* Reset module */ writel(we, priv->base + CLK_RST_R(reg)); @@ -507,11 +506,10 @@ static int rzg2l_cpg_assert(struct reset_controller_dev *rcdev, { struct rzg2l_cpg_priv *priv = rcdev_to_priv(rcdev); const struct rzg2l_cpg_info *info = priv->info; - unsigned int reg = info->mod_clks[id].off; - u32 value = info->mod_clks[id].reset << 16; + unsigned int reg = info->resets[id].off; + u32 value = BIT(info->resets[id].bit) << 16; - dev_dbg(rcdev->dev, "assert name:%s id:%ld offset:0x%x\n", - info->mod_clks[id].name, id, CLK_RST_R(reg)); + dev_dbg(rcdev->dev, "assert id:%ld offset:0x%x\n", id, CLK_RST_R(reg)); writel(value, priv->base + CLK_RST_R(reg)); return 0; @@ -522,12 +520,12 @@ static int rzg2l_cpg_deassert(struct reset_controller_dev *rcdev, { struct rzg2l_cpg_priv *priv = rcdev_to_priv(rcdev); const struct rzg2l_cpg_info *info = priv->info; - unsigned int reg = info->mod_clks[id].off; - u32 dis = info->mod_clks[id].reset; + unsigned int reg = info->resets[id].off; + u32 dis = BIT(info->resets[id].bit); u32 value = (dis << 16) | dis; - dev_dbg(rcdev->dev, "deassert name:%s id:%ld offset:0x%x\n", - info->mod_clks[id].name, id, CLK_RST_R(reg)); + dev_dbg(rcdev->dev, "deassert id:%ld offset:0x%x\n", id, + CLK_RST_R(reg)); writel(value, priv->base + CLK_RST_R(reg)); return 0; @@ -538,8 +536,8 @@ static int rzg2l_cpg_status(struct reset_controller_dev *rcdev, { struct rzg2l_cpg_priv *priv = rcdev_to_priv(rcdev); const struct rzg2l_cpg_info *info = priv->info; - unsigned int reg = info->mod_clks[id].off; - u32 bitmask = info->mod_clks[id].reset; + unsigned int reg = info->resets[id].off; + u32 bitmask = BIT(info->resets[id].bit); return !(readl(priv->base + CLK_MRST_R(reg)) & bitmask); } @@ -554,9 +552,11 @@ static const struct reset_control_ops rzg2l_cpg_reset_ops = { static int rzg2l_cpg_reset_xlate(struct reset_controller_dev *rcdev, const struct of_phandle_args *reset_spec) { + struct rzg2l_cpg_priv *priv = rcdev_to_priv(rcdev); + const struct rzg2l_cpg_info *info = priv->info; unsigned int id = reset_spec->args[0]; - if (id >= rcdev->nr_resets) { + if (id >= rcdev->nr_resets || !info->resets[id].off) { dev_err(rcdev->dev, "Invalid reset index %u\n", id); return -EINVAL; } @@ -571,7 +571,7 @@ static int rzg2l_cpg_reset_controller_register(struct rzg2l_cpg_priv *priv) priv->rcdev.dev = priv->dev; priv->rcdev.of_reset_n_cells = 1; priv->rcdev.of_xlate = rzg2l_cpg_reset_xlate; - priv->rcdev.nr_resets = priv->num_mod_clks; + priv->rcdev.nr_resets = priv->num_resets; return devm_reset_controller_register(priv->dev, &priv->rcdev); } @@ -699,6 +699,7 @@ static int __init rzg2l_cpg_probe(struct platform_device *pdev) priv->clks = clks; priv->num_core_clks = info->num_total_core_clks; priv->num_mod_clks = info->num_hw_mod_clks; + priv->num_resets = info->num_resets; priv->last_dt_core_clk = info->last_dt_core_clk; for (i = 0; i < nclks; i++) diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.h b/drivers/clk/renesas/renesas-rzg2l-cpg.h index a6a3bade1985eb..63695280ce8b27 100644 --- a/drivers/clk/renesas/renesas-rzg2l-cpg.h +++ b/drivers/clk/renesas/renesas-rzg2l-cpg.h @@ -77,26 +77,40 @@ enum clk_types { * @id: clock index in array containing all Core and Module Clocks * @parent: id of parent clock * @off: register offset - * @onoff: ON/MON bits - * @reset: reset bits + * @bit: ON/MON bit */ struct rzg2l_mod_clk { const char *name; unsigned int id; unsigned int parent; u16 off; - u8 onoff; - u8 reset; + u8 bit; }; -#define DEF_MOD(_name, _id, _parent, _off, _onoff, _reset) \ - [_id] = { \ +#define DEF_MOD(_name, _id, _parent, _off, _bit) \ + { \ .name = _name, \ - .id = MOD_CLK_BASE + _id, \ + .id = MOD_CLK_BASE + (_id), \ .parent = (_parent), \ .off = (_off), \ - .onoff = (_onoff), \ - .reset = (_reset) \ + .bit = (_bit), \ + } + +/** + * struct rzg2l_reset - Reset definitions + * + * @off: register offset + * @bit: reset bit + */ +struct rzg2l_reset { + u16 off; + u8 bit; +}; + +#define DEF_RST(_id, _off, _bit) \ + [_id] = { \ + .off = (_off), \ + .bit = (_bit) \ } /** @@ -127,6 +141,10 @@ struct rzg2l_cpg_info { unsigned int num_mod_clks; unsigned int num_hw_mod_clks; + /* Resets */ + const struct rzg2l_reset *resets; + unsigned int num_resets; + /* Critical Module Clocks that should not be disabled */ const unsigned int *crit_mod_clks; unsigned int num_crit_mod_clks; diff --git a/include/dt-bindings/clock/r9a07g044-cpg.h b/include/dt-bindings/clock/r9a07g044-cpg.h index 1d8986563fc541..0728ad07ff7a28 100644 --- a/include/dt-bindings/clock/r9a07g044-cpg.h +++ b/include/dt-bindings/clock/r9a07g044-cpg.h @@ -32,58 +32,188 @@ #define R9A07G044_OSCCLK 21 /* R9A07G044 Module Clocks */ -#define R9A07G044_CLK_GIC600 0 -#define R9A07G044_CLK_IA55 1 -#define R9A07G044_CLK_SYC 2 -#define R9A07G044_CLK_DMAC 3 -#define R9A07G044_CLK_SYSC 4 -#define R9A07G044_CLK_MTU 5 -#define R9A07G044_CLK_GPT 6 -#define R9A07G044_CLK_ETH0 7 -#define R9A07G044_CLK_ETH1 8 -#define R9A07G044_CLK_I2C0 9 -#define R9A07G044_CLK_I2C1 10 -#define R9A07G044_CLK_I2C2 11 -#define R9A07G044_CLK_I2C3 12 -#define R9A07G044_CLK_SCIF0 13 -#define R9A07G044_CLK_SCIF1 14 -#define R9A07G044_CLK_SCIF2 15 -#define R9A07G044_CLK_SCIF3 16 -#define R9A07G044_CLK_SCIF4 17 -#define R9A07G044_CLK_SCI0 18 -#define R9A07G044_CLK_SCI1 19 -#define R9A07G044_CLK_GPIO 20 -#define R9A07G044_CLK_SDHI0 21 -#define R9A07G044_CLK_SDHI1 22 -#define R9A07G044_CLK_USB0 23 -#define R9A07G044_CLK_USB1 24 -#define R9A07G044_CLK_CANFD 25 -#define R9A07G044_CLK_SSI0 26 -#define R9A07G044_CLK_SSI1 27 -#define R9A07G044_CLK_SSI2 28 -#define R9A07G044_CLK_SSI3 29 -#define R9A07G044_CLK_MHU 30 -#define R9A07G044_CLK_OSTM0 31 -#define R9A07G044_CLK_OSTM1 32 -#define R9A07G044_CLK_OSTM2 33 -#define R9A07G044_CLK_WDT0 34 -#define R9A07G044_CLK_WDT1 35 -#define R9A07G044_CLK_WDT2 36 -#define R9A07G044_CLK_WDT_PON 37 -#define R9A07G044_CLK_GPU 38 -#define R9A07G044_CLK_ISU 39 -#define R9A07G044_CLK_H264 40 -#define R9A07G044_CLK_CRU 41 -#define R9A07G044_CLK_MIPI_DSI 42 -#define R9A07G044_CLK_LCDC 43 -#define R9A07G044_CLK_SRC 44 -#define R9A07G044_CLK_RSPI0 45 -#define R9A07G044_CLK_RSPI1 46 -#define R9A07G044_CLK_RSPI2 47 -#define R9A07G044_CLK_ADC 48 -#define R9A07G044_CLK_TSU_PCLK 49 -#define R9A07G044_CLK_SPI 50 -#define R9A07G044_CLK_MIPI_DSI_V 51 -#define R9A07G044_CLK_MIPI_DSI_PIN 52 +#define R9A07G044_CA55_SCLK 0 +#define R9A07G044_CA55_PCLK 1 +#define R9A07G044_CA55_ATCLK 2 +#define R9A07G044_CA55_GICCLK 3 +#define R9A07G044_CA55_PERICLK 4 +#define R9A07G044_CA55_ACLK 5 +#define R9A07G044_CA55_TSCLK 6 +#define R9A07G044_GIC600_GICCLK 7 +#define R9A07G044_IA55_CLK 8 +#define R9A07G044_IA55_PCLK 9 +#define R9A07G044_MHU_PCLK 10 +#define R9A07G044_SYC_CNT_CLK 11 +#define R9A07G044_DMAC_ACLK 12 +#define R9A07G044_DMAC_PCLK 13 +#define R9A07G044_OSTM0_PCLK 14 +#define R9A07G044_OSTM1_PCLK 15 +#define R9A07G044_OSTM2_PCLK 16 +#define R9A07G044_MTU_X_MCK_MTU3 17 +#define R9A07G044_POE3_CLKM_POE 18 +#define R9A07G044_GPT_PCLK 19 +#define R9A07G044_POEG_A_CLKP 20 +#define R9A07G044_POEG_B_CLKP 21 +#define R9A07G044_POEG_C_CLKP 22 +#define R9A07G044_POEG_D_CLKP 23 +#define R9A07G044_WDT0_PCLK 24 +#define R9A07G044_WDT0_CLK 25 +#define R9A07G044_WDT1_PCLK 26 +#define R9A07G044_WDT1_CLK 27 +#define R9A07G044_WDT2_PCLK 28 +#define R9A07G044_WDT2_CLK 29 +#define R9A07G044_SPI_CLK2 30 +#define R9A07G044_SPI_CLK 31 +#define R9A07G044_SDHI0_IMCLK 32 +#define R9A07G044_SDHI0_IMCLK2 33 +#define R9A07G044_SDHI0_CLK_HS 34 +#define R9A07G044_SDHI0_ACLK 35 +#define R9A07G044_SDHI1_IMCLK 36 +#define R9A07G044_SDHI1_IMCLK2 37 +#define R9A07G044_SDHI1_CLK_HS 38 +#define R9A07G044_SDHI1_ACLK 39 +#define R9A07G044_GPU_CLK 40 +#define R9A07G044_GPU_AXI_CLK 41 +#define R9A07G044_GPU_ACE_CLK 42 +#define R9A07G044_ISU_ACLK 43 +#define R9A07G044_ISU_PCLK 44 +#define R9A07G044_H264_CLK_A 45 +#define R9A07G044_H264_CLK_P 46 +#define R9A07G044_CRU_SYSCLK 47 +#define R9A07G044_CRU_VCLK 48 +#define R9A07G044_CRU_PCLK 49 +#define R9A07G044_CRU_ACLK 50 +#define R9A07G044_MIPI_DSI_PLLCLK 51 +#define R9A07G044_MIPI_DSI_SYSCLK 52 +#define R9A07G044_MIPI_DSI_ACLK 53 +#define R9A07G044_MIPI_DSI_PCLK 54 +#define R9A07G044_MIPI_DSI_VCLK 55 +#define R9A07G044_MIPI_DSI_LPCLK 56 +#define R9A07G044_LCDC_CLK_A 57 +#define R9A07G044_LCDC_CLK_P 58 +#define R9A07G044_LCDC_CLK_D 59 +#define R9A07G044_SSI0_PCLK2 60 +#define R9A07G044_SSI0_PCLK_SFR 61 +#define R9A07G044_SSI1_PCLK2 62 +#define R9A07G044_SSI1_PCLK_SFR 63 +#define R9A07G044_SSI2_PCLK2 64 +#define R9A07G044_SSI2_PCLK_SFR 65 +#define R9A07G044_SSI3_PCLK2 66 +#define R9A07G044_SSI3_PCLK_SFR 67 +#define R9A07G044_SRC_CLKP 68 +#define R9A07G044_USB_U2H0_HCLK 69 +#define R9A07G044_USB_U2H1_HCLK 70 +#define R9A07G044_USB_U2P_EXR_CPUCLK 71 +#define R9A07G044_USB_PCLK 72 +#define R9A07G044_ETH0_CLK_AXI 73 +#define R9A07G044_ETH0_CLK_CHI 74 +#define R9A07G044_ETH1_CLK_AXI 75 +#define R9A07G044_ETH1_CLK_CHI 76 +#define R9A07G044_I2C0_PCLK 77 +#define R9A07G044_I2C1_PCLK 78 +#define R9A07G044_I2C2_PCLK 79 +#define R9A07G044_I2C3_PCLK 80 +#define R9A07G044_SCIF0_CLK_PCK 81 +#define R9A07G044_SCIF1_CLK_PCK 82 +#define R9A07G044_SCIF2_CLK_PCK 83 +#define R9A07G044_SCIF3_CLK_PCK 84 +#define R9A07G044_SCIF4_CLK_PCK 85 +#define R9A07G044_SCI0_CLKP 86 +#define R9A07G044_SCI1_CLKP 87 +#define R9A07G044_IRDA_CLKP 88 +#define R9A07G044_RSPI0_CLKB 89 +#define R9A07G044_RSPI1_CLKB 90 +#define R9A07G044_RSPI2_CLKB 91 +#define R9A07G044_CANFD_PCLK 92 +#define R9A07G044_GPIO_HCLK 93 +#define R9A07G044_ADC_ADCLK 94 +#define R9A07G044_ADC_PCLK 95 +#define R9A07G044_TSU_PCLK 96 + +/* R9A07G044 Resets */ +#define R9A07G044_CA55_RST_1_0 0 +#define R9A07G044_CA55_RST_1_1 1 +#define R9A07G044_CA55_RST_3_0 2 +#define R9A07G044_CA55_RST_3_1 3 +#define R9A07G044_CA55_RST_4 4 +#define R9A07G044_CA55_RST_5 5 +#define R9A07G044_CA55_RST_6 6 +#define R9A07G044_CA55_RST_7 7 +#define R9A07G044_CA55_RST_8 8 +#define R9A07G044_CA55_RST_9 9 +#define R9A07G044_CA55_RST_10 10 +#define R9A07G044_CA55_RST_11 11 +#define R9A07G044_CA55_RST_12 12 +#define R9A07G044_GIC600_GICRESET_N 13 +#define R9A07G044_GIC600_DBG_GICRESET_N 14 +#define R9A07G044_IA55_RESETN 15 +#define R9A07G044_MHU_RESETN 16 +#define R9A07G044_DMAC_ARESETN 17 +#define R9A07G044_DMAC_RST_ASYNC 18 +#define R9A07G044_SYC_RESETN 19 +#define R9A07G044_OSTM0_PRESETZ 20 +#define R9A07G044_OSTM1_PRESETZ 21 +#define R9A07G044_OSTM2_PRESETZ 22 +#define R9A07G044_MTU_X_PRESET_MTU3 23 +#define R9A07G044_POE3_RST_M_REG 24 +#define R9A07G044_GPT_RST_C 25 +#define R9A07G044_POEG_A_RST 26 +#define R9A07G044_POEG_B_RST 27 +#define R9A07G044_POEG_C_RST 28 +#define R9A07G044_POEG_D_RST 29 +#define R9A07G044_WDT0_PRESETN 30 +#define R9A07G044_WDT1_PRESETN 31 +#define R9A07G044_WDT2_PRESETN 32 +#define R9A07G044_SPI_RST 33 +#define R9A07G044_SDHI0_IXRST 34 +#define R9A07G044_SDHI1_IXRST 35 +#define R9A07G044_GPU_RESETN 36 +#define R9A07G044_GPU_AXI_RESETN 37 +#define R9A07G044_GPU_ACE_RESETN 38 +#define R9A07G044_ISU_ARESETN 39 +#define R9A07G044_ISU_PRESETN 40 +#define R9A07G044_H264_X_RESET_VCP 41 +#define R9A07G044_H264_CP_PRESET_P 42 +#define R9A07G044_CRU_CMN_RSTB 43 +#define R9A07G044_CRU_PRESETN 44 +#define R9A07G044_CRU_ARESETN 45 +#define R9A07G044_MIPI_DSI_CMN_RSTB 46 +#define R9A07G044_MIPI_DSI_ARESET_N 47 +#define R9A07G044_MIPI_DSI_PRESET_N 48 +#define R9A07G044_LCDC_RESET_N 49 +#define R9A07G044_SSI0_RST_M2_REG 50 +#define R9A07G044_SSI1_RST_M2_REG 51 +#define R9A07G044_SSI2_RST_M2_REG 52 +#define R9A07G044_SSI3_RST_M2_REG 53 +#define R9A07G044_SRC_RST 54 +#define R9A07G044_USB_U2H0_HRESETN 55 +#define R9A07G044_USB_U2H1_HRESETN 56 +#define R9A07G044_USB_U2P_EXL_SYSRST 57 +#define R9A07G044_USB_PRESETN 58 +#define R9A07G044_ETH0_RST_HW_N 59 +#define R9A07G044_ETH1_RST_HW_N 60 +#define R9A07G044_I2C0_MRST 61 +#define R9A07G044_I2C1_MRST 62 +#define R9A07G044_I2C2_MRST 63 +#define R9A07G044_I2C3_MRST 64 +#define R9A07G044_SCIF0_RST_SYSTEM_N 65 +#define R9A07G044_SCIF1_RST_SYSTEM_N 66 +#define R9A07G044_SCIF2_RST_SYSTEM_N 67 +#define R9A07G044_SCIF3_RST_SYSTEM_N 68 +#define R9A07G044_SCIF4_RST_SYSTEM_N 69 +#define R9A07G044_SCI0_RST 70 +#define R9A07G044_SCI1_RST 71 +#define R9A07G044_IRDA_RST 72 +#define R9A07G044_RSPI0_RST 73 +#define R9A07G044_RSPI1_RST 74 +#define R9A07G044_RSPI2_RST 75 +#define R9A07G044_CANFD_RSTP_N 76 +#define R9A07G044_CANFD_RSTC_N 77 +#define R9A07G044_GPIO_RSTN 78 +#define R9A07G044_GPIO_PORT_RESETN 79 +#define R9A07G044_GPIO_SPARE_RESETN 80 +#define R9A07G044_ADC_PRESETN 81 +#define R9A07G044_ADC_ADRST_N 82 +#define R9A07G044_TSU_PRESETN 83 #endif /* __DT_BINDINGS_CLOCK_R9A07G044_CPG_H__ */ From 2e2832562c877e6530b8480982d99a4ff90c6777 Mon Sep 17 00:00:00 2001 From: Alan Young Date: Fri, 9 Jul 2021 09:48:54 +0100 Subject: [PATCH 219/714] ALSA: pcm: Call substream ack() method upon compat mmap commit If a 32-bit application is being used with a 64-bit kernel and is using the mmap mechanism to write data, then the SNDRV_PCM_IOCTL_SYNC_PTR ioctl results in calling snd_pcm_ioctl_sync_ptr_compat(). Make this use pcm_lib_apply_appl_ptr() so that the substream's ack() method, if defined, is called. The snd_pcm_sync_ptr() function, used in the 64-bit ioctl case, already uses snd_pcm_ioctl_sync_ptr_compat(). Fixes: 9027c4639ef1 ("ALSA: pcm: Call ack() whenever appl_ptr is updated") Signed-off-by: Alan Young Cc: Link: https://lore.kernel.org/r/c441f18c-eb2a-3bdd-299a-696ccca2de9c@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 14e32825c3395f..c88c4316c417d9 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3063,9 +3063,14 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, boundary = 0x7fffffff; snd_pcm_stream_lock_irq(substream); /* FIXME: we should consider the boundary for the sync from app */ - if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) - control->appl_ptr = scontrol.appl_ptr; - else + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) { + err = pcm_lib_apply_appl_ptr(substream, + scontrol.appl_ptr); + if (err < 0) { + snd_pcm_stream_unlock_irq(substream); + return err; + } + } else scontrol.appl_ptr = control->appl_ptr % boundary; if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; From c71f78a662611fe2c67f3155da19b0eff0f29762 Mon Sep 17 00:00:00 2001 From: Maxim Schwalm Date: Mon, 12 Jul 2021 03:50:11 +0300 Subject: [PATCH 220/714] ASoC: rt5631: Fix regcache sync errors on resume The ALC5631 does not like multi-write accesses, avoid them. This fixes: rt5631 4-001a: Unable to sync registers 0x3a-0x3c. -121 errors on resume from suspend (and all registers after the registers in the error not being synced). Inspired by commit 2d30e9494f1e ("ASoC: rt5651: Fix regcache sync errors on resume") from Hans de Geode, which fixed the same errors on ALC5651. Signed-off-by: Maxim Schwalm Link: https://lore.kernel.org/r/20210712005011.28536-1-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5631.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c index 3000bc128b5bcb..38356ea2bd6ef3 100644 --- a/sound/soc/codecs/rt5631.c +++ b/sound/soc/codecs/rt5631.c @@ -1695,6 +1695,8 @@ static const struct regmap_config rt5631_regmap_config = { .reg_defaults = rt5631_reg, .num_reg_defaults = ARRAY_SIZE(rt5631_reg), .cache_type = REGCACHE_RBTREE, + .use_single_read = true, + .use_single_write = true, }; static int rt5631_i2c_probe(struct i2c_client *i2c, From ffe000217c5068c5da07ccb1c0f8cce7ad767435 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 24 Jun 2021 12:47:32 -0500 Subject: [PATCH 221/714] dma-buf/sync_file: Don't leak fences on merge failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each add_fence() call does a dma_fence_get() on the relevant fence. In the error path, we weren't calling dma_fence_put() so all those fences got leaked. Also, in the krealloc_array failure case, we weren't freeing the fences array. Instead, ensure that i and fences are always zero-initialized and dma_fence_put() all the fences and kfree(fences) on every error path. Signed-off-by: Jason Ekstrand Reviewed-by: Christian König Fixes: a02b9dc90d84 ("dma-buf/sync_file: refactor fence storage in struct sync_file") Cc: Gustavo Padovan Cc: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210624174732.1754546-1-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/dma-buf/sync_file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 20d9bddbb985bf..394e6e1e968604 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; - struct dma_fence **fences, **nfences, **a_fences, **b_fences; - int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; + struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; + int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) @@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ - for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { + for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { struct dma_fence *pt_a = a_fences[i_a]; struct dma_fence *pt_b = b_fences[i_b]; @@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, fences = nfences; } - if (sync_file_set_fence(sync_file, fences, i) < 0) { - kfree(fences); + if (sync_file_set_fence(sync_file, fences, i) < 0) goto err; - } strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; err: + while (i) + dma_fence_put(fences[--i]); + kfree(fences); fput(sync_file->file); return NULL; From 98f7cd23aa9563c06503991a0cd41f0cacc99f5f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 9 Jul 2021 16:49:43 +0200 Subject: [PATCH 222/714] s390/vdso32: add vdso32.lds to targets This fixes a permanent rebuild of the 32 bit vdso. The RPM build process was first calling 'make bzImage' and 'make modules' as a second step. This caused a recompilation of vdso32.so, which in turn also changed the build-id of vmlinux. Fixes: 779df2248739 ("s390/vdso: add minimal compat vdso") Signed-off-by: Sven Schnelle Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso32/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index b2349a3f4fa305..3457dcf1039653 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -29,6 +29,7 @@ $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) obj-y += vdso32_wrapper.o +targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) # Disable gcov profiling, ubsan and kasan for VDSO code From c30e5e9ff0c695a8bac813ff4d5216fd7fb51e4e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 8 Jul 2021 11:41:21 +0000 Subject: [PATCH 223/714] s390/defconfig: allow early device mapper disks doing make install on an Ubuntu that is installed on an LVM will fail to boot. Turns out that defconfig misses 2 device mapper related configs for the Ubuntu initramfs. Signed-off-by: Christian Borntraeger Reviewed-by: Steffen Maier Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 3 ++- arch/s390/configs/defconfig | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 86afcc6b56bf74..55cb846cda37d4 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -436,7 +436,7 @@ CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m -CONFIG_BLK_DEV_DM=m +CONFIG_BLK_DEV_DM=y CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -453,6 +453,7 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m +CONFIG_DM_INIT=y CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 71b49ea5b0583e..5d847ab5feaa68 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -427,7 +427,7 @@ CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m -CONFIG_BLK_DEV_DM=m +CONFIG_BLK_DEV_DM=y CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -444,6 +444,7 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m +CONFIG_DM_INIT=y CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m From 5f34b1eb2f8d4bba7d6352e767ef84bee9096d97 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 12 Jul 2021 10:00:43 +0100 Subject: [PATCH 224/714] arm64: fix strlen() with CONFIG_KASAN_HW_TAGS When the kernel is built with CONFIG_KASAN_HW_TAGS and the CPU supports MTE, memory accesses are checked at 16-byte granularity, and out-of-bounds accesses can result in tag check faults. Our current implementation of strlen() makes unaligned 16-byte accesses (within a naturally aligned 4096-byte window), and can trigger tag check faults. This can be seen at boot time, e.g. | BUG: KASAN: invalid-access in __pi_strlen+0x14/0x150 | Read at addr f4ff0000c0028300 by task swapper/0/0 | Pointer tag: [f4], memory tag: [fe] | | CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-09550-g03c2813535a2-dirty #20 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x1b0 | show_stack+0x1c/0x30 | dump_stack_lvl+0x68/0x84 | print_address_description+0x7c/0x2b4 | kasan_report+0x138/0x38c | __do_kernel_fault+0x190/0x1c4 | do_tag_check_fault+0x78/0x90 | do_mem_abort+0x44/0xb4 | el1_abort+0x40/0x60 | el1h_64_sync_handler+0xb0/0xd0 | el1h_64_sync+0x78/0x7c | __pi_strlen+0x14/0x150 | __register_sysctl_table+0x7c4/0x890 | register_leaf_sysctl_tables+0x1a4/0x210 | register_leaf_sysctl_tables+0xc8/0x210 | __register_sysctl_paths+0x22c/0x290 | register_sysctl_table+0x2c/0x40 | sysctl_init+0x20/0x30 | proc_sys_init+0x3c/0x48 | proc_root_init+0x80/0x9c | start_kernel+0x640/0x69c | __primary_switched+0xc0/0xc8 To fix this, we can reduce the (strlen-internal) MIN_PAGE_SIZE to 16 bytes when CONFIG_KASAN_HW_TAGS is selected. This will cause strlen() to align the base pointer downwards to a 16-byte boundary, and to discard the additional prefix bytes without counting them. All subsequent accesses will be 16-byte aligned 16-byte LDPs. While the comments say the body of the loop will access 32 bytes, this is performed as two 16-byte acceses, with the second made only if the first did not encounter a NUL byte, so the body of the loop will not over-read across a 16-byte boundary. No other string routines are affected. The other str*() routines will not make any access which straddles a 16-byte boundary, and the mem*() routines will only make acceses which straddle a 16-byte boundary when which is entirely within the bounds of the relevant base and size arguments. Fixes: 325a1de81287 ("arm64: Import updated version of Cortex Strings' strlen") Signed-off-by: Mark Rutland Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Marco Elver Cc: Robin Murphy Cc: Will Deacon Reviewed-by: Catalin Marinas Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210712090043.20847-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/strlen.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 35fbdb7d6e1a6e..1648790e91b3ce 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -8,6 +8,7 @@ #include #include +#include /* Assumptions: * @@ -42,7 +43,16 @@ #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 +/* + * When KASAN_HW_TAGS is in use, memory is checked at MTE_GRANULE_SIZE + * (16-byte) granularity, and we must ensure that no access straddles this + * alignment boundary. + */ +#ifdef CONFIG_KASAN_HW_TAGS +#define MIN_PAGE_SIZE MTE_GRANULE_SIZE +#else #define MIN_PAGE_SIZE 4096 +#endif /* Since strings are short on average, we check the first 16 bytes of the string for a NUL character. In order to do an unaligned ldp From e62e074814862cffd8e60a1bdf52d6b592a03675 Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Thu, 8 Jul 2021 07:15:42 -0400 Subject: [PATCH 225/714] arm64: Add missing header in two files Add missing header on include/asm/smp_plat.h, as it calls function cpu_logical_map(). Also include it on kernel/cpufeature.c since it has calls to functions cpu_panic_kernel() and cpu_die_early(). Both files call functions defined on this header, make the header dependencies less fragile. Signed-off-by: Carlos Bilbao Acked-by: Mark Rutland Link: https://lore.kernel.org/r/4325940.LvFx2qVVIh@iron-maiden Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp_plat.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h index 99ad77df8f525d..97ddc6c203b7db 100644 --- a/arch/arm64/include/asm/smp_plat.h +++ b/arch/arm64/include/asm/smp_plat.h @@ -10,6 +10,7 @@ #include +#include #include struct mpidr_hash { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 125d5c9471ac5d..0ead8bfedf201f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include From c1132702c71f4b95db9435bac5fdc912881563e0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Jul 2021 13:10:00 +0100 Subject: [PATCH 226/714] Revert "arm64: cache: Lower ARCH_DMA_MINALIGN to 64 (L1_CACHE_BYTES)" This reverts commit 65688d2a05deb9f0671a7e2301eadbfe7e27c9e9. Unfortunately, the original Qualcomm Kryo cores integrated into the MSM8996 SoC feature an L2 cache with 128-byte lines which sits above the Point of Coherency. Consequently, we must restore ARCH_DMA_MINALIGN to its former ugly self so that non-coherent DMA can be performed safely on devices built using this SoC. Thanks to Jeffrey Hugo for confirming this with a hardware designer. Link: https://lore.kernel.org/r/CAOCk7NqdpUZFMSXfGjw0_1NaSK5gyTLgpS9kSdZn1jmBy-QkfA@mail.gmail.com/ Reported-by: Yassine Oudjana Link: https://lore.kernel.org/r/uHgsRacR8hJ7nW-I-pIcehzg-lNIn7NJvaL7bP9tfAftFsBjsgaY2qTjG9zyBgxHkjNL1WPNrD7YVv2JVD2_Wy-a5VTbcq-1xEi8ZnwrXBo=@protonmail.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a9c0716e744056..a074459f8f2fb7 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -47,7 +47,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) From 38e0c99249f8f12e1450234a0f7fb357a1b73843 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 24 Jun 2021 10:50:58 +0100 Subject: [PATCH 227/714] firmware: arm_scmi: Simplify device probe function on the bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the driver core calls the probe callback it already checked that the devices match, so there is no need to call the match callback again. Link: https://lore.kernel.org/r/20210624095059.4010157-1-sudeep.holla@arm.com Suggested-by: Uwe Kleine-König Tested-by: Cristian Marussi Reviewed-by: Cristian Marussi Acked-by: Uwe Kleine-König Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 784cf0027da3c5..dc113ad37ad90b 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -104,11 +104,6 @@ static int scmi_dev_probe(struct device *dev) { struct scmi_driver *scmi_drv = to_scmi_driver(dev->driver); struct scmi_device *scmi_dev = to_scmi_dev(dev); - const struct scmi_device_id *id; - - id = scmi_dev_match_id(scmi_dev, scmi_drv); - if (!id) - return -ENODEV; if (!scmi_dev->handle) return -EPROBE_DEFER; From 5e469dac326555d2038d199a6329458cc82a34e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 24 Jun 2021 10:50:59 +0100 Subject: [PATCH 228/714] firmware: arm_scmi: Ensure drivers provide a probe function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus probe callback calls the driver callback without further checking. Better be safe than sorry and refuse registration of a driver without a probe function to prevent a NULL pointer exception. Link: https://lore.kernel.org/r/20210624095059.4010157-2-sudeep.holla@arm.com Fixes: 933c504424a2 ("firmware: arm_scmi: add scmi protocol bus to enumerate protocol devices") Reported-by: Uwe Kleine-König Tested-by: Cristian Marussi Reviewed-by: Cristian Marussi Acked-by: Uwe Kleine-König Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index dc113ad37ad90b..6c7e24935eca58 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -134,6 +134,9 @@ int scmi_driver_register(struct scmi_driver *driver, struct module *owner, { int retval; + if (!driver->probe) + return -EINVAL; + retval = scmi_protocol_device_request(driver->id_table); if (retval) return retval; From 7a691f16ccad05d770f813d9c4b4337a30c6d63f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 7 Jul 2021 14:50:28 +0100 Subject: [PATCH 229/714] firmware: arm_scmi: Fix possible scmi_linux_errmap buffer overflow The scmi_linux_errmap buffer access index is supposed to depend on the array size to prevent element out of bounds access. It uses SCMI_ERR_MAX to check bounds but that can mismatch with the array size. It also changes the success into -EIO though scmi_linux_errmap is never used in case of success, it is expected to work for success case too. It is slightly confusing code as the negative of the error code is used as index to the buffer. Fix it by negating it at the start and make it more readable. Link: https://lore.kernel.org/r/20210707135028.1869642-1-sudeep.holla@arm.com Reported-by: kernel test robot Reported-by: Dan Carpenter Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 66e5e694be7d4d..36d80661d473d1 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -47,7 +47,6 @@ enum scmi_error_codes { SCMI_ERR_GENERIC = -8, /* Generic Error */ SCMI_ERR_HARDWARE = -9, /* Hardware Error */ SCMI_ERR_PROTOCOL = -10,/* Protocol Error */ - SCMI_ERR_MAX }; /* List of all SCMI devices active in system */ @@ -166,8 +165,10 @@ static const int scmi_linux_errmap[] = { static inline int scmi_to_linux_errno(int errno) { - if (errno < SCMI_SUCCESS && errno > SCMI_ERR_MAX) - return scmi_linux_errmap[-errno]; + int err_idx = -errno; + + if (err_idx >= SCMI_SUCCESS && err_idx < ARRAY_SIZE(scmi_linux_errmap)) + return scmi_linux_errmap[err_idx]; return -EIO; } From 92743071464fca5acbbe812d9a0d88de3eaaad36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 21 Jun 2021 22:16:51 +0200 Subject: [PATCH 230/714] firmware: arm_ffa: Ensure drivers provide a probe function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus probe callback calls the driver callback without further checking. Better be safe than sorry and refuse registration of a driver without a probe function to prevent a NULL pointer exception. Link: https://lore.kernel.org/r/20210621201652.127611-1-u.kleine-koenig@pengutronix.de Fixes: e781858488b9 ("firmware: arm_ffa: Add initial FFA bus support for device enumeration") Signed-off-by: Uwe Kleine-König Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c index 83166e02b19184..d2cc24319626bd 100644 --- a/drivers/firmware/arm_ffa/bus.c +++ b/drivers/firmware/arm_ffa/bus.c @@ -99,6 +99,9 @@ int ffa_driver_register(struct ffa_driver *driver, struct module *owner, { int ret; + if (!driver->probe) + return -EINVAL; + driver->driver.bus = &ffa_bus_type; driver->driver.name = driver->name; driver->driver.owner = owner; From e362547addc39e4bb18ad5bdfd59ce4d512d0c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 21 Jun 2021 22:16:52 +0200 Subject: [PATCH 231/714] firmware: arm_ffa: Simplify probe function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the driver core calls the probe callback it already checked that the devices match, so there is no need to call the match callback again. Link: https://lore.kernel.org/r/20210621201652.127611-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/bus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c index d2cc24319626bd..00fe595a5bc897 100644 --- a/drivers/firmware/arm_ffa/bus.c +++ b/drivers/firmware/arm_ffa/bus.c @@ -46,9 +46,6 @@ static int ffa_device_probe(struct device *dev) struct ffa_driver *ffa_drv = to_ffa_driver(dev->driver); struct ffa_device *ffa_dev = to_ffa_dev(dev); - if (!ffa_device_match(dev, dev->driver)) - return -ENODEV; - return ffa_drv->probe(ffa_dev); } From ba684a31d3626c86cd9097e12d6ed57d224d077d Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 22 Jun 2021 17:22:02 +0100 Subject: [PATCH 232/714] firmware: arm_ffa: Fix the comment style clang produces the following warning: drivers/firmware/arm_ffa/driver.c:123: warning: expecting prototype for FF(). Prototype was for FFA_PAGE_SIZE() instead This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Fix the same by removing the kernel-doc style comment here. Link: https://lore.kernel.org/r/20210622162202.3485866-1-sudeep.holla@arm.com Reported-by: kernel test robot Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index b1edb4b2e94aaf..88b822575ac412 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -120,7 +120,7 @@ #define PACK_TARGET_INFO(s, r) \ (FIELD_PREP(SENDER_ID_MASK, (s)) | FIELD_PREP(RECEIVER_ID_MASK, (r))) -/** +/* * FF-A specification mentions explicitly about '4K pages'. This should * not be confused with the kernel PAGE_SIZE, which is the translation * granule kernel is configured and may be one among 4K, 16K and 64K. From dd925db6f07556061c11ab1fbfa4a0145ae6b438 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 7 Jul 2021 14:47:39 +0100 Subject: [PATCH 233/714] firmware: arm_ffa: Fix a possible ffa_linux_errmap buffer overflow The ffa_linux_errmap buffer access index is supposed to range from 0-8 but it ranges from 1-9 instead. It reads one element out of bounds. It also changes the success into -EINVAL though ffa_to_linux_errno is never used in case of success, it is expected to work for success case too. It is slightly confusing code as the negative of the error code is used as index to the buffer. Fix it by negating it at the start and make it more readable. Link: https://lore.kernel.org/r/20210707134739.1869481-1-sudeep.holla@arm.com Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 88b822575ac412..c9fb56afbcb494 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -149,8 +149,10 @@ static const int ffa_linux_errmap[] = { static inline int ffa_to_linux_errno(int errno) { - if (errno < FFA_RET_SUCCESS && errno >= -ARRAY_SIZE(ffa_linux_errmap)) - return ffa_linux_errmap[-errno]; + int err_idx = -errno; + + if (err_idx >= 0 && err_idx < ARRAY_SIZE(ffa_linux_errmap)) + return ffa_linux_errmap[err_idx]; return -EINVAL; } From f35e0cc25280cb0063b0e4481f99268fbd872ff3 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 6 Jul 2021 08:44:00 +0300 Subject: [PATCH 234/714] doc, af_xdp: Fix bind flags option typo Fix XDP_ZERO_COPY flag typo since it is actually named XDP_ZEROCOPY instead as per if_xdp.h uapi header. Signed-off-by: Baruch Siach Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/1656fdf94704e9e735df0f8b97667d8f26dd098b.1625550240.git.baruch@tkos.co.il --- Documentation/networking/af_xdp.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 42576880aa4a1b..60b217b436be66 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -243,8 +243,8 @@ Configuration Flags and Socket Options These are the various configuration flags that can be used to control and monitor the behavior of AF_XDP sockets. -XDP_COPY and XDP_ZERO_COPY bind flags -------------------------------------- +XDP_COPY and XDP_ZEROCOPY bind flags +------------------------------------ When you bind to a socket, the kernel will first try to use zero-copy copy. If zero-copy is not supported, it will fall back on using copy @@ -252,7 +252,7 @@ mode, i.e. copying all packets out to user space. But if you would like to force a certain mode, you can use the following flags. If you pass the XDP_COPY flag to the bind call, the kernel will force the socket into copy mode. If it cannot use copy mode, the bind call will -fail with an error. Conversely, the XDP_ZERO_COPY flag will force the +fail with an error. Conversely, the XDP_ZEROCOPY flag will force the socket into zero-copy mode or fail. XDP_SHARED_UMEM bind flag From 5e21bb4e812566aef86fbb77c96a4ec0782286e4 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 8 Jul 2021 16:04:09 +0800 Subject: [PATCH 235/714] bpf, test: fix NULL pointer dereference on invalid expected_attach_type These two types of XDP progs (BPF_XDP_DEVMAP, BPF_XDP_CPUMAP) will not be executed directly in the driver, therefore we should also not directly run them from here. To run in these two situations, there must be further preparations done, otherwise these may cause a kernel panic. For more details, see also dev_xdp_attach(). [ 46.982479] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 46.984295] #PF: supervisor read access in kernel mode [ 46.985777] #PF: error_code(0x0000) - not-present page [ 46.987227] PGD 800000010dca4067 P4D 800000010dca4067 PUD 10dca6067 PMD 0 [ 46.989201] Oops: 0000 [#1] SMP PTI [ 46.990304] CPU: 7 PID: 562 Comm: a.out Not tainted 5.13.0+ #44 [ 46.992001] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/24 [ 46.995113] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 46.996586] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.001562] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.003115] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.005163] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.007135] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.009171] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.011172] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.013244] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.015705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.017475] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.019558] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.021595] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.023574] PKRU: 55555554 [ 47.024571] Call Trace: [ 47.025424] __bpf_prog_run32+0x32/0x50 [ 47.026296] ? printk+0x53/0x6a [ 47.027066] ? ktime_get+0x39/0x90 [ 47.027895] bpf_test_run.cold.28+0x23/0x123 [ 47.028866] ? printk+0x53/0x6a [ 47.029630] bpf_prog_test_run_xdp+0x149/0x1d0 [ 47.030649] __sys_bpf+0x1305/0x23d0 [ 47.031482] __x64_sys_bpf+0x17/0x20 [ 47.032316] do_syscall_64+0x3a/0x80 [ 47.033165] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 47.034254] RIP: 0033:0x7f04a51364dd [ 47.035133] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 48 [ 47.038768] RSP: 002b:00007fff8f9fc518 EFLAGS: 00000213 ORIG_RAX: 0000000000000141 [ 47.040344] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f04a51364dd [ 47.041749] RDX: 0000000000000048 RSI: 0000000020002a80 RDI: 000000000000000a [ 47.043171] RBP: 00007fff8f9fc530 R08: 0000000002049300 R09: 0000000020000100 [ 47.044626] R10: 0000000000000004 R11: 0000000000000213 R12: 0000000000401070 [ 47.046088] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 47.047579] Modules linked in: [ 47.048318] CR2: 0000000000000000 [ 47.049120] ---[ end trace 7ad34443d5be719a ]--- [ 47.050273] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 47.051343] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.054943] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.056068] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.057522] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.058961] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.060390] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.061803] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.063249] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.065070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.066307] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.067747] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.069217] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.070652] PKRU: 55555554 [ 47.071318] Kernel panic - not syncing: Fatal exception [ 47.072854] Kernel Offset: disabled [ 47.073683] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry") Reported-by: Abaci Signed-off-by: Xuan Zhuo Signed-off-by: Daniel Borkmann Reviewed-by: Dust Li Acked-by: Jesper Dangaard Brouer Acked-by: David Ahern Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210708080409.73525-1-xuanzhuo@linux.alibaba.com --- net/bpf/test_run.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aa47af349ba804..1cc75c811e247b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -701,6 +701,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (prog->expected_attach_type == BPF_XDP_DEVMAP || + prog->expected_attach_type == BPF_XDP_CPUMAP) + return -EINVAL; if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; From 40751c6c9bea6a5cfede7c61ee5f3cb1ab857029 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 7 Jul 2021 15:48:40 -0500 Subject: [PATCH 236/714] scripts/spdxcheck.py: Strictly read license files in utf-8 Commit bc41a7f36469 ("LICENSES: Add the CC-BY-4.0 license") unfortunately introduced LICENSES/dual/CC-BY-4.0 in UTF-8 Unicode text While python will barf at it with: FAIL: 'ascii' codec can't decode byte 0xe2 in position 2109: ordinal not in range(128) Traceback (most recent call last): File "scripts/spdxcheck.py", line 244, in spdx = read_spdxdata(repo) File "scripts/spdxcheck.py", line 47, in read_spdxdata for l in open(el.path).readlines(): File "/usr/lib/python3.6/encodings/ascii.py", line 26, in decode return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 2109: ordinal not in range(128) While it is indeed debatable if 'Licensor.' used in the license file needs unicode quotes, instead, force spdxcheck to read utf-8. Reported-by: Rahul T R Signed-off-by: Nishanth Menon Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210707204840.30891-1-nm@ti.com Signed-off-by: Jonathan Corbet --- scripts/spdxcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index 3e784cf9f401fd..ebd06ae642c933 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -44,7 +44,7 @@ def read_spdxdata(repo): continue exception = None - for l in open(el.path).readlines(): + for l in open(el.path, encoding="utf-8").readlines(): if l.startswith('Valid-License-Identifier:'): lid = l.split(':')[1].strip().upper() if lid in spdx.licenses: From 1adee589cd6da2ead7f1b5dd82419eac59a2e2b0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 11:03:35 -0500 Subject: [PATCH 237/714] kernel: debug: Fix unreachable code in gdb_serial_stub() Fix the following warning: kernel/debug/gdbstub.c:1049:4: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] fallthrough; ^ include/linux/compiler_attributes.h:210:41: note: expanded from macro 'fallthrough' # define fallthrough __attribute__((__fallthrough__) by placing the fallthrough; statement inside ifdeffery. Reported-by: kernel test robot Signed-off-by: Gustavo A. R. Silva --- kernel/debug/gdbstub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 8372897402f4a2..b6f28fad430738 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -1045,8 +1045,8 @@ int gdb_serial_stub(struct kgdb_state *ks) gdb_cmd_detachkill(ks); return DBG_PASS_EVENT; } -#endif fallthrough; +#endif case 'C': /* Exception passing */ tmp = gdb_cmd_exception_pass(ks); if (tmp > 0) From e8865537a68bb3032f449f5eb108fa8cd76ebb6d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 11:09:13 -0500 Subject: [PATCH 238/714] fcntl: Fix unreachable code in do_fcntl() Fix the following warning: fs/fcntl.c:373:3: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] fallthrough; ^ include/linux/compiler_attributes.h:210:41: note: expanded from macro 'fallthrough' # define fallthrough __attribute__((__fallthrough__)) by placing the fallthrough; statement inside ifdeffery. Reported-by: kernel test robot Signed-off-by: Gustavo A. R. Silva --- fs/fcntl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index dfc72f15be7fcd..f946bec8f1f1b9 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -369,8 +369,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, /* 32-bit arches must use fcntl64() */ case F_OFD_SETLK: case F_OFD_SETLKW: -#endif fallthrough; +#endif case F_SETLK: case F_SETLKW: if (copy_from_user(&flock, argp, sizeof(flock))) From 1eb5f4a3ddd949af1abe947c02ad990c013dd620 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 11:13:49 -0500 Subject: [PATCH 239/714] mtd: cfi_util: Fix unreachable code issue Fix the following warning: drivers/mtd/chips/cfi_util.c:112:3: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] fallthrough; ^ include/linux/compiler_attributes.h:210:41: note: expanded from macro 'fallthrough' # define fallthrough __attribute__((__fallthrough__)) ^ drivers/mtd/chips/cfi_util.c:168:3: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] fallthrough; ^ include/linux/compiler_attributes.h:210:41: note: expanded from macro 'fallthrough' # define fallthrough __attribute__((__fallthrough__)) by placing the fallthrough; statement inside ifdeffery. Reported-by: kernel test robot Signed-off-by: Gustavo A. R. Silva --- drivers/mtd/chips/cfi_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c index 99b7986002f0f7..6a6a2a21d2edc1 100644 --- a/drivers/mtd/chips/cfi_util.c +++ b/drivers/mtd/chips/cfi_util.c @@ -108,8 +108,8 @@ map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi #if BITS_PER_LONG >= 64 case 8: onecmd |= (onecmd << (chip_mode * 32)); -#endif fallthrough; +#endif case 4: onecmd |= (onecmd << (chip_mode * 16)); fallthrough; @@ -164,8 +164,8 @@ unsigned long cfi_merge_status(map_word val, struct map_info *map, #if BITS_PER_LONG >= 64 case 8: res |= (onestat >> (chip_mode * 32)); -#endif fallthrough; +#endif case 4: res |= (onestat >> (chip_mode * 16)); fallthrough; From a1739c307a072e46473a2ba239eb60e6d711c96c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Jul 2021 15:31:47 -0700 Subject: [PATCH 240/714] net: hdlc: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Cc: Krzysztof Halasa Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Martin Schiller Cc: linux-x25@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 8 ++++---- drivers/net/wan/hdlc_fr.c | 8 ++++---- drivers/net/wan/hdlc_ppp.c | 8 ++++---- drivers/net/wan/hdlc_raw.c | 8 ++++---- drivers/net/wan/hdlc_raw_eth.c | 8 ++++---- drivers/net/wan/hdlc_x25.c | 8 ++++---- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 349ca18088e880..c54fdae950fb71 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -364,19 +364,19 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } -static int __init mod_init(void) +static int __init hdlc_cisco_init(void) { register_hdlc_protocol(&proto); return 0; } -static void __exit mod_exit(void) +static void __exit hdlc_cisco_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_cisco_init); +module_exit(hdlc_cisco_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("Cisco HDLC protocol support for generic HDLC"); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 72250fe0a1dff0..25e3564ce1182d 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1279,19 +1279,19 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } -static int __init mod_init(void) +static int __init hdlc_fr_init(void) { register_hdlc_protocol(&proto); return 0; } -static void __exit mod_exit(void) +static void __exit hdlc_fr_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_fr_init); +module_exit(hdlc_fr_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("Frame-Relay protocol support for generic HDLC"); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 834be2ae3e9e5b..b81ecf432a0c3b 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -705,20 +705,20 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } -static int __init mod_init(void) +static int __init hdlc_ppp_init(void) { skb_queue_head_init(&tx_queue); register_hdlc_protocol(&proto); return 0; } -static void __exit mod_exit(void) +static void __exit hdlc_ppp_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_ppp_init); +module_exit(hdlc_ppp_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("PPP protocol support for generic HDLC"); diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index 388fcc09b4dd79..54d28496fefdf0 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -90,7 +90,7 @@ static int raw_ioctl(struct net_device *dev, struct ifreq *ifr) } -static int __init mod_init(void) +static int __init hdlc_raw_init(void) { register_hdlc_protocol(&proto); return 0; @@ -98,14 +98,14 @@ static int __init mod_init(void) -static void __exit mod_exit(void) +static void __exit hdlc_raw_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_raw_init); +module_exit(hdlc_raw_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("Raw HDLC protocol support for generic HDLC"); diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index c70a518b8b4783..927596276a072f 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -110,7 +110,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) } -static int __init mod_init(void) +static int __init hdlc_eth_init(void) { register_hdlc_protocol(&proto); return 0; @@ -118,14 +118,14 @@ static int __init mod_init(void) -static void __exit mod_exit(void) +static void __exit hdlc_eth_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_eth_init); +module_exit(hdlc_eth_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("Ethernet encapsulation support for generic HDLC"); diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index d2bf72bf3bd7c2..9b7ebf8bd85c40 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -365,19 +365,19 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) return -EINVAL; } -static int __init mod_init(void) +static int __init hdlc_x25_init(void) { register_hdlc_protocol(&proto); return 0; } -static void __exit mod_exit(void) +static void __exit hdlc_x25_exit(void) { unregister_hdlc_protocol(&proto); } -module_init(mod_init); -module_exit(mod_exit); +module_init(hdlc_x25_init); +module_exit(hdlc_x25_exit); MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("X.25 protocol support for generic HDLC"); From 5796015fa968a3349027a27dcd04c71d95c53ba5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 12 Jul 2021 09:45:06 +0300 Subject: [PATCH 241/714] ipv6: allocate enough headroom in ip6_finish_output2() When TEE target mirrors traffic to another interface, sk_buff may not have enough headroom to be processed correctly. ip_finish_output2() detect this situation for ipv4 and allocates new skb with enogh headroom. However ipv6 lacks this logic in ip_finish_output2 and it leads to skb_under_panic: skbuff: skb_under_panic: text:ffffffffc0866ad4 len:96 put:24 head:ffff97be85e31800 data:ffff97be85e317f8 tail:0x58 end:0xc0 dev:gre0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:110! invalid opcode: 0000 [#1] SMP PTI CPU: 2 PID: 393 Comm: kworker/2:2 Tainted: G OE 5.13.0 #13 Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.4 04/01/2014 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:skb_panic+0x48/0x4a Call Trace: skb_push.cold.111+0x10/0x10 ipgre_header+0x24/0xf0 [ip_gre] neigh_connected_output+0xae/0xf0 ip6_finish_output2+0x1a8/0x5a0 ip6_output+0x5c/0x110 nf_dup_ipv6+0x158/0x1000 [nf_dup_ipv6] tee_tg6+0x2e/0x40 [xt_TEE] ip6t_do_table+0x294/0x470 [ip6_tables] nf_hook_slow+0x44/0xc0 nf_hook.constprop.34+0x72/0xe0 ndisc_send_skb+0x20d/0x2e0 ndisc_send_ns+0xd1/0x210 addrconf_dad_work+0x3c8/0x540 process_one_work+0x1d1/0x370 worker_thread+0x30/0x390 kthread+0x116/0x130 ret_from_fork+0x22/0x30 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d4ee2169afd816..01bea76e3891c2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,10 +60,38 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + int delta = hh_len - skb_headroom(skb); const struct in6_addr *nexthop; struct neighbour *neigh; int ret; + /* Be paranoid, rather than too clever. */ + if (unlikely(delta > 0) && dev->header_ops) { + /* pskb_expand_head() might crash, if skb is shared */ + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (likely(nskb)) { + if (skb->sk) + skb_set_owner_w(skb, skb->sk); + consume_skb(skb); + } else { + kfree_skb(skb); + } + skb = nskb; + } + if (skb && + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); + skb = NULL; + } + if (!skb) { + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + return -ENOMEM; + } + } + if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); From 71ce9d92fc7089f287c3e95a981bdec7545a8588 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Jul 2021 15:37:50 +0100 Subject: [PATCH 242/714] octeontx2-pf: Fix uninitialized boolean variable pps In the case where act->id is FLOW_ACTION_POLICE and also act->police.rate_bytes_ps > 0 or act->police.rate_pkt_ps is not > 0 the boolean variable pps contains an uninitialized value when function otx2_tc_act_set_police is called. Fix this by initializing pps to false. Addresses-Coverity: ("Uninitialized scalar variable)" Fixes: 68fbff68dbea ("octeontx2-pf: Add police action for TC flower") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 905fc02a7dfed7..972b202b9884d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -288,7 +288,7 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, struct otx2_nic *priv; u32 burst, mark = 0; u8 nr_police = 0; - bool pps; + bool pps = false; u64 rate; int i; From efdb6720b44b2f016d2a04b2c84e577e57ed84bd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 11 Jul 2021 20:10:49 -0700 Subject: [PATCH 243/714] mm/rmap: fix munlocking Anon THP with mlocked ptes Many thanks to Kirill for reminding that PageDoubleMap cannot be relied on to warn of pte mappings in the Anon THP case; and a scan of subpages does not seem appropriate here. Note how follow_trans_huge_pmd() does not even mark an Anon THP as mlocked when compound_mapcount != 1: multiple mlocking of Anon THP is avoided, so simply return from page_mlock() in this case. Link: https://lore.kernel.org/lkml/cfa154c-d595-406-eb7d-eb9df730f944@google.com/ Fixes: d9770fcc1c0c ("mm/rmap: fix old bug: munlocking THP missed other mlocks") Reported-by: Kirill A. Shutemov Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Alistair Popple Cc: Jason Gunthorpe Cc: Ralph Campbell Cc: Christoph Hellwig Cc: Yang Shi Cc: Shakeel Butt Signed-off-by: Linus Torvalds --- mm/rmap.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 795f9d5f838636..b9eb5c12f3fe10 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1440,21 +1440,20 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* * If the page is mlock()d, we cannot swap it out. */ - if (!(flags & TTU_IGNORE_MLOCK)) { - if (vma->vm_flags & VM_LOCKED) { - /* PTE-mapped THP are never marked as mlocked */ - if (!PageTransCompound(page) || - (PageHead(page) && !PageDoubleMap(page))) { - /* - * Holding pte lock, we do *not* need - * mmap_lock here - */ - mlock_vma_page(page); - } - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } + if (!(flags & TTU_IGNORE_MLOCK) && + (vma->vm_flags & VM_LOCKED)) { + /* + * PTE-mapped THP are never marked as mlocked: so do + * not set it on a DoubleMap THP, nor on an Anon THP + * (which may still be PTE-mapped after DoubleMap was + * cleared). But stop unmapping even in those cases. + */ + if (!PageTransCompound(page) || (PageHead(page) && + !PageDoubleMap(page) && !PageAnon(page))) + mlock_vma_page(page); + page_vma_mapped_walk_done(&pvmw); + ret = false; + break; } /* Unexpected PMD-mapped THP? */ @@ -1986,8 +1985,10 @@ static bool page_mlock_one(struct page *page, struct vm_area_struct *vma, */ if (vma->vm_flags & VM_LOCKED) { /* - * PTE-mapped THP are never marked as mlocked, but - * this function is never called when PageDoubleMap(). + * PTE-mapped THP are never marked as mlocked; but + * this function is never called on a DoubleMap THP, + * nor on an Anon THP (which may still be PTE-mapped + * after DoubleMap was cleared). */ mlock_vma_page(page); /* @@ -2022,6 +2023,10 @@ void page_mlock(struct page *page) VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); + /* Anon THP are only marked as mlocked when singly mapped */ + if (PageTransCompound(page) && PageAnon(page)) + return; + rmap_walk(page, &rwc); } From 79789db03fdd77510cfb35cb4b3bd52b6c50c901 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Jul 2021 16:32:07 +0100 Subject: [PATCH 244/714] mm: Make copy_huge_page() always available Rewrite copy_huge_page() and move it into mm/util.c so it's always available. Fixes an exposure of uninitialised memory on configurations with HUGETLB and UFFD enabled and MIGRATION disabled. Fixes: 8cc5fcbb5be8 ("mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 5 ----- include/linux/mm.h | 1 + mm/migrate.c | 48 ----------------------------------------- mm/util.c | 10 +++++++++ 4 files changed, 11 insertions(+), 53 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9b7b7cd3bae9f5..23dadf7aeba896 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -51,7 +51,6 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); -extern void copy_huge_page(struct page *dst, struct page *src); #else static inline void putback_movable_pages(struct list_head *l) {} @@ -77,10 +76,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } - -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION diff --git a/include/linux/mm.h b/include/linux/mm.h index 57453dba41b949..7ca22e6e694a84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -906,6 +906,7 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); +void copy_huge_page(struct page *dst, struct page *src); /* * Compound pages have a destructor function. Provide a diff --git a/mm/migrate.c b/mm/migrate.c index 23cbd9de030b84..34a9ad3e0a4f93 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -536,54 +536,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -/* - * Gigantic pages are so large that we do not guarantee that page++ pointer - * arithmetic will work across the entire page. We need something more - * specialized. - */ -static void __copy_gigantic_page(struct page *dst, struct page *src, - int nr_pages) -{ - int i; - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < nr_pages; ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - int nr_pages; - - if (PageHuge(src)) { - /* hugetlbfs page */ - struct hstate *h = page_hstate(src); - nr_pages = pages_per_huge_page(h); - - if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { - __copy_gigantic_page(dst, src, nr_pages); - return; - } - } else { - /* thp page */ - BUG_ON(!PageTransHuge(src)); - nr_pages = thp_nr_pages(src); - } - - for (i = 0; i < nr_pages; i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - /* * Copy the page to its new location */ diff --git a/mm/util.c b/mm/util.c index 99c6cc77de9e2f..9043d03750a733 100644 --- a/mm/util.c +++ b/mm/util.c @@ -731,6 +731,16 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); +void copy_huge_page(struct page *dst, struct page *src) +{ + unsigned i, nr = compound_nr(src); + + for (i = 0; i < nr; i++) { + cond_resched(); + copy_highpage(nth_page(dst, i), nth_page(src, i)); + } +} + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; int sysctl_overcommit_ratio __read_mostly = 50; unsigned long sysctl_overcommit_kbytes __read_mostly; From b0863f1927323110e3d0d69f6adb6a91018a9a3c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 12 Jul 2021 14:54:36 -0400 Subject: [PATCH 245/714] USB: core: Fix incorrect pipe calculation in do_proc_control() When the user submits a control URB via usbfs, the user supplies the bRequestType value and the kernel uses it to compute the pipe value. However, do_proc_control() performs this computation incorrectly in the case where the bRequestType direction bit is set to USB_DIR_IN and the URB's transfer length is 0: The pipe's direction is also set to IN but it should be OUT, which is the direction the actual transfer will use regardless of bRequestType. Commit 5cc59c418fde ("USB: core: WARN if pipe direction != setup packet direction") added a check to compare the direction bit in the pipe value to a control URB's actual direction and to WARN if they are different. This can be triggered by the incorrect computation mentioned above, as found by syzbot. This patch fixes the computation, thus avoiding the WARNing. Reported-and-tested-by: syzbot+72af3105289dcb4c055b@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210712185436.GB326369@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b9746449876394..9618ba622a2d06 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1133,7 +1133,7 @@ static int do_proc_control(struct usb_dev_state *ps, "wIndex=%04x wLength=%04x\n", ctrl->bRequestType, ctrl->bRequest, ctrl->wValue, ctrl->wIndex, ctrl->wLength); - if (ctrl->bRequestType & 0x80) { + if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) { pipe = usb_rcvctrlpipe(dev, 0); snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0); From 7fef2edf7cc753b51f7ccc74993971b0a9c81eca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jul 2021 17:50:01 +0200 Subject: [PATCH 246/714] sd: don't mess with SD_MINORS for CONFIG_DEBUG_BLOCK_EXT_DEVT No need to give up the original sd minor even with this option, and if we did we'd also need to fix the number of minors for this configuration to actually work. Fixes: 7c3f828b522b0 ("block: refactor device number setup in __device_add_disk") Reported-by: Guenter Roeck Signed-off-by: Christoph Hellwig Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/scsi/sd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6d2d63629a90d5..b8d55af763f92d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -98,11 +98,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); -#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 -#else -#define SD_MINORS 0 -#endif static void sd_config_discard(struct scsi_disk *, unsigned int); static void sd_config_write_same(struct scsi_disk *); From ed01ad3a2fac8fff63f441ead4f8653da053c65b Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Thu, 1 Jul 2021 17:52:31 +0000 Subject: [PATCH 247/714] kunit: tool: Fix error messages for cases of no tests and wrong TAP header This patch addresses misleading error messages reported by kunit_tool in two cases. First, in the case of TAP output having an incorrect header format or missing a header, the parser used to output an error message of 'no tests run!'. Now the parser outputs an error message of 'could not parse test results!'. As an example: Before: $ ./tools/testing/kunit/kunit.py parse /dev/null [ERROR] no tests run! ... After: $ ./tools/testing/kunit/kunit.py parse /dev/null [ERROR] could not parse test results! ... Second, in the case of TAP output with the correct header but no tests, the parser used to output an error message of 'could not parse test results!'. Now the parser outputs an error message of 'no tests run!'. As an example: Before: $ echo -e 'TAP version 14\n1..0' | ./tools/testing/kunit/kunit.py parse [ERROR] could not parse test results! After: $ echo -e 'TAP version 14\n1..0' | ./tools/testing/kunit/kunit.py parse [ERROR] no tests run! Additionally, this patch also corrects the tests in kunit_tool_test.py and adds a test to check the error in the case of TAP output with the correct header but no tests. Signed-off-by: Rae Moar Reviewed-by: David Gow Reviewed-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 6 ++++-- tools/testing/kunit/kunit_tool_test.py | 16 +++++++++++++--- ...st_is_test_passed-no_tests_run_no_header.log} | 0 ...t_is_test_passed-no_tests_run_with_header.log | 2 ++ 4 files changed, 19 insertions(+), 5 deletions(-) rename tools/testing/kunit/test_data/{test_is_test_passed-no_tests_run.log => test_is_test_passed-no_tests_run_no_header.log} (100%) create mode 100644 tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c3c524b79db8a5..b88db3f51dc5d8 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -338,9 +338,11 @@ def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: def parse_test_result(lines: LineStream) -> TestResult: consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): - return TestResult(TestStatus.NO_TESTS, [], lines) + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) - if not expected_test_suite_num: + if expected_test_suite_num == 0: + return TestResult(TestStatus.NO_TESTS, [], lines) + elif expected_test_suite_num is None: return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] for i in range(1, expected_test_suite_num + 1): diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index bdae0e5f619701..75045aa0f8a16b 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -157,8 +157,18 @@ def test_parse_failed_test_log(self): kunit_parser.TestStatus.FAILURE, result.status) + def test_no_header(self): + empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines(file.readlines())) + self.assertEqual(0, len(result.suites)) + self.assertEqual( + kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, + result.status) + def test_no_tests(self): - empty_log = test_data_path('test_is_test_passed-no_tests_run.log') + empty_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) @@ -173,7 +183,7 @@ def test_no_kunit_output(self): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('no tests run!')) + print_mock.assert_any_call(StrContains('could not parse test results!')) print_mock.stop() file.close() @@ -309,7 +319,7 @@ def test_crashed_test_json(self): result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for('test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log similarity index 100% rename from tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log rename to tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log new file mode 100644 index 00000000000000..5f48ee659d40e0 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log @@ -0,0 +1,2 @@ +TAP version 14 +1..0 From a72fa6304342a3ad749955fd9007695f761e12d6 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 23 Jun 2021 13:17:21 -0700 Subject: [PATCH 248/714] Documentation: kunit: drop obsolete note about uml_abort for coverage Commit b6d5799b0b58 ("kunit: Add 'kunit_shutdown' option") changes KUnit to call kernel_halt() by default when done testing. This fixes the issue with not having .gcda files due to not calling atexit() handlers, and therefore we can stop recommending people manually tweak UML code. The need to use older versions of GCC (<=6) remains however, due to linktime issues, same as before. Note: There also might still be issues with .gcda files as well in newer versions. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/running_tips.rst | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/Documentation/dev-tools/kunit/running_tips.rst b/Documentation/dev-tools/kunit/running_tips.rst index 7d99386cf94a94..d1626d548fa528 100644 --- a/Documentation/dev-tools/kunit/running_tips.rst +++ b/Documentation/dev-tools/kunit/running_tips.rst @@ -86,19 +86,7 @@ Generating code coverage reports under UML .. note:: TODO(brendanhiggins@google.com): There are various issues with UML and versions of gcc 7 and up. You're likely to run into missing ``.gcda`` - files or compile errors. We know one `faulty GCC commit - `_ - but not how we'd go about getting this fixed. The compile errors still - need some investigation. - -.. note:: - TODO(brendanhiggins@google.com): for recent versions of Linux - (5.10-5.12, maybe earlier), there's a bug with gcov counters not being - flushed in UML. This translates to very low (<1%) reported coverage. This is - related to the above issue and can be worked around by replacing the - one call to ``uml_abort()`` (it's in ``os_dump_core()``) with a plain - ``exit()``. - + files or compile errors. This is different from the "normal" way of getting coverage information that is documented in Documentation/dev-tools/gcov.rst. From 58c965d84e14196e762c803c50c7b207a9e352bc Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 23 Jun 2021 12:09:19 -0700 Subject: [PATCH 249/714] kunit: tool: remove unnecessary "annotations" import The import was working around the fact "tuple[T]" was used instead of typing.Tuple[T]. Convert it to use type.Tuple to be consistent with how the rest of the code is anotated. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 90bc007f1f9382..2c6f916ccbafa7 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,15 +6,13 @@ # Author: Felix Guo # Author: Brendan Higgins -from __future__ import annotations import importlib.util import logging import subprocess import os import shutil import signal -from typing import Iterator -from typing import Optional +from typing import Iterator, Optional, Tuple from contextlib import ExitStack @@ -208,7 +206,7 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT raise ConfigError(arch + ' is not a valid arch') def get_source_tree_ops_from_qemu_config(config_path: str, - cross_compile: Optional[str]) -> tuple[ + cross_compile: Optional[str]) -> Tuple[ str, LinuxSourceTreeOperations]: # The module name/path has very little to do with where the actual file # exists (I learned this through experimentation and could not find it From df4b0807ca1a62822342d404b863eff933d15762 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 12 Jul 2021 19:52:58 +0000 Subject: [PATCH 250/714] kunit: tool: Assert the version requirement Commit 87c9c1631788 ("kunit: tool: add support for QEMU") on the 'next' tree adds 'from __future__ import annotations' in 'kunit_kernel.py'. Because it is supported on only >=3.7 Python, people using older Python will get below error: Traceback (most recent call last): File "./tools/testing/kunit/kunit.py", line 20, in import kunit_kernel File "/home/sjpark/linux/tools/testing/kunit/kunit_kernel.py", line 9 from __future__ import annotations ^ SyntaxError: future feature annotations is not defined This commit adds a version assertion in 'kunit.py', so that people get more explicit error message like below: Traceback (most recent call last): File "./tools/testing/kunit/kunit.py", line 15, in assert sys.version_info >= (3, 7), "Python version is too old" AssertionError: Python version is too old Signed-off-by: SeongJae Park Acked-by: Daniel Latypov Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index be8d8d4a4e08fa..6276ce0c0196d8 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -12,6 +12,8 @@ import os import time +assert sys.version_info >= (3, 7), "Python version is too old" + from collections import namedtuple from enum import Enum, auto From 0c0f6299ba71faf610e311605e09e96331c45f28 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Wed, 30 Jun 2021 16:57:40 +0200 Subject: [PATCH 251/714] selftests: memory-hotplug: avoid spamming logs with dump_page(), ratio limit hot-remove error test While the offline memory test obey ratio limit, the same test with error injection does not and tries to offline all the hotpluggable memory, spamming system logs with hundreds of thousands of dump_page() entries, slowing system down (to the point the test itself timesout and gets terminated) and excessive fs occupation: ... [ 9784.393354] page:c00c0000007d1b40 refcount:3 mapcount:0 mapping:c0000001fc03e950 index:0xe7b [ 9784.393355] def_blk_aops [ 9784.393356] flags: 0x3ffff800002062(referenced|active|workingset|private) [ 9784.393358] raw: 003ffff800002062 c0000001b9343a68 c0000001b9343a68 c0000001fc03e950 [ 9784.393359] raw: 0000000000000e7b c000000006607b18 00000003ffffffff c00000000490d000 [ 9784.393359] page dumped because: migration failure [ 9784.393360] page->mem_cgroup:c00000000490d000 [ 9784.393416] migrating pfn 1f46d failed ret:1 ... $ grep "page dumped because: migration failure" /var/log/kern.log | wc -l 2405558 $ ls -la /var/log/kern.log -rw-r----- 1 syslog adm 2256109539 Jun 30 14:19 /var/log/kern.log Signed-off-by: Paolo Pisati Acked-by: Krzysztof Kozlowski Signed-off-by: Shuah Khan --- tools/testing/selftests/memory-hotplug/mem-on-off-test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index b37585e6aa3868..46a97f318f58e4 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -282,7 +282,9 @@ done # echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error for memory in `hotpluggable_online_memory`; do - offline_memory_expect_fail $memory + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_fail $memory + fi done echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error From 422969bbb5af2f7226cf75cdb4aae02f46299dc2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 9 Jul 2021 13:26:20 -0700 Subject: [PATCH 252/714] scsi: core: Fix the documentation of the scsi_execute() time parameter The unit of the scsi_execute() timeout parameter is 1/HZ seconds instead of one second, just like the timeouts used in the block layer. Fix the documentation header above the definition of the scsi_execute() macro. Link: https://lore.kernel.org/r/20210709202638.9480-3-bvanassche@acm.org Fixes: "[SCSI] use scatter lists for all block pc requests and simplify hw handlers" # v2.6.16.28 Cc: "James E.J. Bottomley" Cc: Hannes Reinecke Cc: Ming Lei Cc: John Garry Reviewed-by: Avri Altman Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8f9727e525aa29..7456a26aef513b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -194,7 +194,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * @bufflen: len of buffer * @sense: optional sense buffer * @sshdr: optional decoded sense header - * @timeout: request timeout in seconds + * @timeout: request timeout in HZ * @retries: number of times to retry request * @flags: flags for ->cmd_flags * @rq_flags: flags for ->rq_flags From fbf1a58701ecbf61211476277a77eaada6c091d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 10 Jul 2021 20:36:23 -0700 Subject: [PATCH 253/714] scsi: fas216: Fix a build error Use SAM_STAT_GOOD instead of GOOD since GOOD has been removed. Link: https://lore.kernel.org/r/20210711033623.11267-1-bvanassche@acm.org Fixes: 3d45cefc8edd ("scsi: core: Drop obsolete Linux-specific SCSI status codes") Fixes: df1303147649 ("scsi: fas216: Use get_status_byte() to avoid using Linux-specific status codes") Cc: Hannes Reinecke Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/arm/fas216.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 30ed3d23635ad6..6baa9b36367d62 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -2010,7 +2010,7 @@ static void fas216_rq_sns_done(FAS216_Info *info, struct scsi_cmnd *SCpnt, "request sense complete, result=0x%04x%02x%02x", result, SCpnt->SCp.Message, SCpnt->SCp.Status); - if (result != DID_OK || SCpnt->SCp.Status != GOOD) + if (result != DID_OK || SCpnt->SCp.Status != SAM_STAT_GOOD) /* * Something went wrong. Make sure that we don't * have valid data in the sense buffer that could From 93aa71ad7379900e61c8adff6a710a4c18c7c99b Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 1 Jul 2021 13:56:59 -0600 Subject: [PATCH 254/714] scsi: core: Fix bad pointer dereference when ehandler kthread is invalid Commit 66a834d09293 ("scsi: core: Fix error handling of scsi_host_alloc()") changed the allocation logic to call put_device() to perform host cleanup with the assumption that IDA removal and stopping the kthread would properly be performed in scsi_host_dev_release(). However, in the unlikely case that the error handler thread fails to spawn, shost->ehandler is set to ERR_PTR(-ENOMEM). The error handler cleanup code in scsi_host_dev_release() will call kthread_stop() if shost->ehandler != NULL which will always be the case whether the kthread was successfully spawned or not. In the case that it failed to spawn this has the nasty side effect of trying to dereference an invalid pointer when kthread_stop() is called. The following splat provides an example of this behavior in the wild: scsi host11: error handler thread failed to spawn, error = -4 Kernel attempted to read user page (10c) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x0000010c Faulting instruction address: 0xc00000000818e9a8 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: ibmvscsi(+) scsi_transport_srp dm_multipath dm_mirror dm_region hash dm_log dm_mod fuse overlay squashfs loop CPU: 12 PID: 274 Comm: systemd-udevd Not tainted 5.13.0-rc7 #1 NIP: c00000000818e9a8 LR: c0000000089846e8 CTR: 0000000000007ee8 REGS: c000000037d12ea0 TRAP: 0300 Not tainted (5.13.0-rc7) MSR: 800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 28228228 XER: 20040001 CFAR: c0000000089846e4 DAR: 000000000000010c DSISR: 40000000 IRQMASK: 0 GPR00: c0000000089846e8 c000000037d13140 c000000009cc1100 fffffffffffffffc GPR04: 0000000000000001 0000000000000000 0000000000000000 c000000037dc0000 GPR08: 0000000000000000 c000000037dc0000 0000000000000001 00000000fffff7ff GPR12: 0000000000008000 c00000000a049000 c000000037d13d00 000000011134d5a0 GPR16: 0000000000001740 c0080000190d0000 c0080000190d1740 c000000009129288 GPR20: c000000037d13bc0 0000000000000001 c000000037d13bc0 c0080000190b7898 GPR24: c0080000190b7708 0000000000000000 c000000033bb2c48 0000000000000000 GPR28: c000000046b28280 0000000000000000 000000000000010c fffffffffffffffc NIP [c00000000818e9a8] kthread_stop+0x38/0x230 LR [c0000000089846e8] scsi_host_dev_release+0x98/0x160 Call Trace: [c000000033bb2c48] 0xc000000033bb2c48 (unreliable) [c0000000089846e8] scsi_host_dev_release+0x98/0x160 [c00000000891e960] device_release+0x60/0x100 [c0000000087e55c4] kobject_release+0x84/0x210 [c00000000891ec78] put_device+0x28/0x40 [c000000008984ea4] scsi_host_alloc+0x314/0x430 [c0080000190b38bc] ibmvscsi_probe+0x54/0xad0 [ibmvscsi] [c000000008110104] vio_bus_probe+0xa4/0x4b0 [c00000000892a860] really_probe+0x140/0x680 [c00000000892aefc] driver_probe_device+0x15c/0x200 [c00000000892b63c] device_driver_attach+0xcc/0xe0 [c00000000892b740] __driver_attach+0xf0/0x200 [c000000008926f28] bus_for_each_dev+0xa8/0x130 [c000000008929ce4] driver_attach+0x34/0x50 [c000000008928fc0] bus_add_driver+0x1b0/0x300 [c00000000892c798] driver_register+0x98/0x1a0 [c00000000810eb60] __vio_register_driver+0x80/0xe0 [c0080000190b4a30] ibmvscsi_module_init+0x9c/0xdc [ibmvscsi] [c0000000080121d0] do_one_initcall+0x60/0x2d0 [c000000008261abc] do_init_module+0x7c/0x320 [c000000008265700] load_module+0x2350/0x25b0 [c000000008265cb4] __do_sys_finit_module+0xd4/0x160 [c000000008031110] system_call_exception+0x150/0x2d0 [c00000000800d35c] system_call_common+0xec/0x278 Fix this be nulling shost->ehandler when the kthread fails to spawn. Link: https://lore.kernel.org/r/20210701195659.3185475-1-tyreld@linux.ibm.com Fixes: 66a834d09293 ("scsi: core: Fix error handling of scsi_host_alloc()") Cc: stable@vger.kernel.org Reviewed-by: Ming Lei Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 929a3b043ad7a0..3f6f14f0cafb3e 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -488,6 +488,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost_printk(KERN_WARNING, shost, "error handler thread failed to spawn, error = %ld\n", PTR_ERR(shost->ehandler)); + shost->ehandler = NULL; goto fail; } From 8b3bdd99c092bbaeaa7d9eecb1a3e5dc9112002b Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 2 Jul 2021 18:09:22 +0200 Subject: [PATCH 255/714] scsi: zfcp: Report port fc_security as unknown early during remote cable pull On remote cable pull, a zfcp_port keeps its status and only gets ZFCP_STATUS_PORT_LINK_TEST added. Only after an ADISC timeout, we would actually start port recovery and remove ZFCP_STATUS_COMMON_UNBLOCKED which zfcp_sysfs_port_fc_security_show() detected and reported as "unknown" instead of the old and possibly stale zfcp_port->connection_info. Add check for ZFCP_STATUS_PORT_LINK_TEST for timely "unknown" report. Link: https://lore.kernel.org/r/20210702160922.2667874-1-maier@linux.ibm.com Fixes: a17c78460093 ("scsi: zfcp: report FC Endpoint Security in sysfs") Cc: #5.7+ Reviewed-by: Benjamin Block Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 544efd4c42f022..b8cd75a872eeb8 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -487,6 +487,7 @@ static ssize_t zfcp_sysfs_port_fc_security_show(struct device *dev, if (0 == (status & ZFCP_STATUS_COMMON_OPEN) || 0 == (status & ZFCP_STATUS_COMMON_UNBLOCKED) || 0 == (status & ZFCP_STATUS_PORT_PHYS_OPEN) || + 0 != (status & ZFCP_STATUS_PORT_LINK_TEST) || 0 != (status & ZFCP_STATUS_COMMON_ERP_FAILED) || 0 != (status & ZFCP_STATUS_COMMON_ACCESS_BOXED)) i = sprintf(buf, "unknown\n"); From bb6beabf2f79ea063750bf16e78b61e5a50f2d5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jul 2021 09:57:23 -0700 Subject: [PATCH 256/714] scsi: pm8001: Clean up kernel-doc and comments Fix kernel-doc warnings then test again, wash, rinse, find more, then repeat more/again. Also fix spellos, some grammar, and some punctuation. ../drivers/scsi/pm8001/pm8001_ctl.c:557: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst ** pm8001_ctl_fatal_log_show - fatal error logging ../drivers/scsi/pm8001/pm8001_ctl.c:577: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst ** non_fatal_log_show - non fatal error logging ../drivers/scsi/pm8001/pm8001_ctl.c:622: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst ** pm8001_ctl_gsm_log_show - gsm dump collection Link: https://lore.kernel.org/r/20210708165723.8594-1-rdunlap@infradead.org Cc: Jack Wang Cc: linux-scsi@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Acked-by: Jack Wang Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_ctl.c | 48 +++++++++++++++-------------- drivers/scsi/pm8001/pm8001_hwi.c | 18 +++++------ drivers/scsi/pm8001/pm8001_init.c | 29 +++++++++--------- drivers/scsi/pm8001/pm8001_sas.c | 41 +++++++++++++------------ drivers/scsi/pm8001/pm80xx_hwi.c | 50 +++++++++++++++---------------- 5 files changed, 97 insertions(+), 89 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c index 0b8802beb7ce88..ec05c42e8ee6c3 100644 --- a/drivers/scsi/pm8001/pm8001_ctl.c +++ b/drivers/scsi/pm8001/pm8001_ctl.c @@ -77,7 +77,7 @@ DEVICE_ATTR(interface_rev, S_IRUGO, pm8001_ctl_mpi_interface_rev_show, NULL); * @attr: device attribute (unused) * @buf: the buffer returned * - * A sysfs 'read only' shost attribute. + * A sysfs 'read-only' shost attribute. */ static ssize_t controller_fatal_error_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -149,7 +149,7 @@ static ssize_t pm8001_ctl_ila_version_show(struct device *cdev, static DEVICE_ATTR(ila_version, 0444, pm8001_ctl_ila_version_show, NULL); /** - * pm8001_ctl_inactive_fw_version_show - Inacative firmware version number + * pm8001_ctl_inactive_fw_version_show - Inactive firmware version number * @cdev: pointer to embedded class device * @attr: device attribute (unused) * @buf: the buffer returned @@ -396,6 +396,7 @@ static DEVICE_ATTR(aap_log, S_IRUGO, pm8001_ctl_aap_log_show, NULL); * @cdev:pointer to embedded class device * @attr: device attribute (unused) * @buf: the buffer returned + * * A sysfs 'read-only' shost attribute. */ static ssize_t pm8001_ctl_ib_queue_log_show(struct device *cdev, @@ -430,6 +431,7 @@ static DEVICE_ATTR(ib_log, S_IRUGO, pm8001_ctl_ib_queue_log_show, NULL); * @cdev:pointer to embedded class device * @attr: device attribute (unused) * @buf: the buffer returned + * * A sysfs 'read-only' shost attribute. */ @@ -464,6 +466,7 @@ static DEVICE_ATTR(ob_log, S_IRUGO, pm8001_ctl_ob_queue_log_show, NULL); * @cdev:pointer to embedded class device * @attr: device attribute (unused) * @buf:the buffer returned + * * A sysfs 'read-only' shost attribute. */ static ssize_t pm8001_ctl_bios_version_show(struct device *cdev, @@ -555,13 +558,13 @@ static ssize_t pm8001_ctl_iop_log_show(struct device *cdev, static DEVICE_ATTR(iop_log, S_IRUGO, pm8001_ctl_iop_log_show, NULL); /** - ** pm8001_ctl_fatal_log_show - fatal error logging - ** @cdev:pointer to embedded class device - ** @attr: device attribute - ** @buf: the buffer returned - ** - ** A sysfs 'read-only' shost attribute. - **/ + * pm8001_ctl_fatal_log_show - fatal error logging + * @cdev:pointer to embedded class device + * @attr: device attribute + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ static ssize_t pm8001_ctl_fatal_log_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -575,13 +578,13 @@ static ssize_t pm8001_ctl_fatal_log_show(struct device *cdev, static DEVICE_ATTR(fatal_log, S_IRUGO, pm8001_ctl_fatal_log_show, NULL); /** - ** non_fatal_log_show - non fatal error logging - ** @cdev:pointer to embedded class device - ** @attr: device attribute - ** @buf: the buffer returned - ** - ** A sysfs 'read-only' shost attribute. - **/ + * non_fatal_log_show - non fatal error logging + * @cdev:pointer to embedded class device + * @attr: device attribute + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ static ssize_t non_fatal_log_show(struct device *cdev, struct device_attribute *attr, char *buf) { @@ -620,12 +623,13 @@ static ssize_t non_fatal_count_store(struct device *cdev, static DEVICE_ATTR_RW(non_fatal_count); /** - ** pm8001_ctl_gsm_log_show - gsm dump collection - ** @cdev:pointer to embedded class device - ** @attr: device attribute (unused) - ** @buf: the buffer returned - ** A sysfs 'read-only' shost attribute. - **/ + * pm8001_ctl_gsm_log_show - gsm dump collection + * @cdev:pointer to embedded class device + * @attr: device attribute (unused) + * @buf: the buffer returned + * + * A sysfs 'read-only' shost attribute. + */ static ssize_t pm8001_ctl_gsm_log_show(struct device *cdev, struct device_attribute *attr, char *buf) { diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 33f8217577b14c..17c0f26e683a9c 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -384,7 +384,7 @@ static void update_outbnd_queue_table(struct pm8001_hba_info *pm8001_ha, /** * pm8001_bar4_shift - function is called to shift BAR base address - * @pm8001_ha : our hba card infomation + * @pm8001_ha : our hba card information * @shiftValue : shifting value in memory bar. */ int pm8001_bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shiftValue) @@ -1151,7 +1151,7 @@ static void pm8001_hw_chip_rst(struct pm8001_hba_info *pm8001_ha) } /** - * pm8001_chip_iounmap - which maped when initialized. + * pm8001_chip_iounmap - which mapped when initialized. * @pm8001_ha: our hba card information */ void pm8001_chip_iounmap(struct pm8001_hba_info *pm8001_ha) @@ -1187,10 +1187,10 @@ pm8001_chip_intx_interrupt_enable(struct pm8001_hba_info *pm8001_ha) pm8001_cw32(pm8001_ha, 0, MSGU_ODCR, ODCR_CLEAR_ALL); } - /** - * pm8001_chip_intx_interrupt_disable- disable PM8001 chip interrupt - * @pm8001_ha: our hba card information - */ +/** + * pm8001_chip_intx_interrupt_disable - disable PM8001 chip interrupt + * @pm8001_ha: our hba card information + */ static void pm8001_chip_intx_interrupt_disable(struct pm8001_hba_info *pm8001_ha) { @@ -1876,8 +1876,8 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, * @piomb: the message contents of this outbound message. * * When FW has completed a ssp request for example a IO request, after it has - * filled the SG data with the data, it will trigger this event represent - * that he has finished the job,please check the coresponding buffer. + * filled the SG data with the data, it will trigger this event representing + * that he has finished the job; please check the corresponding buffer. * So we will tell the caller who maybe waiting the result to tell upper layer * that the task has been finished. */ @@ -3522,7 +3522,7 @@ hw_event_phy_down(struct pm8001_hba_info *pm8001_ha, void *piomb) * * when sas layer find a device it will notify LLDD, then the driver register * the domain device to FW, this event is the return device ID which the FW - * has assigned, from now,inter-communication with FW is no longer using the + * has assigned, from now, inter-communication with FW is no longer using the * SAS address, use device ID which FW assigned. */ int pm8001_mpi_reg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 313248c7bab99d..47db7e0beae6f6 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -233,7 +233,7 @@ static irqreturn_t pm8001_interrupt_handler_msix(int irq, void *opaque) /** * pm8001_interrupt_handler_intx - main INTx interrupt handler. * @irq: interrupt number - * @dev_id: sas_ha structure. The HBA is retrieved from sas_has structure. + * @dev_id: sas_ha structure. The HBA is retrieved from sas_ha structure. */ static irqreturn_t pm8001_interrupt_handler_intx(int irq, void *dev_id) @@ -439,9 +439,9 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, } /** - * pm8001_ioremap - remap the pci high physical address to kernal virtual + * pm8001_ioremap - remap the pci high physical address to kernel virtual * address so that we can access them. - * @pm8001_ha:our hba structure. + * @pm8001_ha: our hba structure. */ static int pm8001_ioremap(struct pm8001_hba_info *pm8001_ha) { @@ -652,7 +652,7 @@ static void pm8001_post_sas_ha_init(struct Scsi_Host *shost, * pm8001_init_sas_add - initialize sas address * @pm8001_ha: our ha struct. * - * Currently we just set the fixed SAS address to our HBA,for manufacture, + * Currently we just set the fixed SAS address to our HBA, for manufacture, * it should read from the EEPROM */ static void pm8001_init_sas_add(struct pm8001_hba_info *pm8001_ha) @@ -790,7 +790,7 @@ struct pm8001_mpi3_phy_pg_trx_config { }; /** - * pm8001_get_internal_phy_settings : Retrieves the internal PHY settings + * pm8001_get_internal_phy_settings - Retrieves the internal PHY settings * @pm8001_ha : our adapter * @phycfg : PHY config page to populate */ @@ -810,7 +810,7 @@ void pm8001_get_internal_phy_settings(struct pm8001_hba_info *pm8001_ha, } /** - * pm8001_get_external_phy_settings : Retrieves the external PHY settings + * pm8001_get_external_phy_settings - Retrieves the external PHY settings * @pm8001_ha : our adapter * @phycfg : PHY config page to populate */ @@ -830,7 +830,7 @@ void pm8001_get_external_phy_settings(struct pm8001_hba_info *pm8001_ha, } /** - * pm8001_get_phy_mask : Retrieves the mask that denotes if a PHY is int/ext + * pm8001_get_phy_mask - Retrieves the mask that denotes if a PHY is int/ext * @pm8001_ha : our adapter * @phymask : The PHY mask */ @@ -868,7 +868,7 @@ void pm8001_get_phy_mask(struct pm8001_hba_info *pm8001_ha, int *phymask) } /** - * pm8001_set_phy_settings_ven_117c_12G() : Configure ATTO 12Gb PHY settings + * pm8001_set_phy_settings_ven_117c_12G() - Configure ATTO 12Gb PHY settings * @pm8001_ha : our adapter */ static @@ -903,7 +903,7 @@ int pm8001_set_phy_settings_ven_117c_12G(struct pm8001_hba_info *pm8001_ha) } /** - * pm8001_configure_phy_settings : Configures PHY settings based on vendor ID. + * pm8001_configure_phy_settings - Configures PHY settings based on vendor ID. * @pm8001_ha : our hba. */ static int pm8001_configure_phy_settings(struct pm8001_hba_info *pm8001_ha) @@ -1053,8 +1053,8 @@ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha) * @ent: pci device id * * This function is the main initialization function, when register a new - * pci driver it is invoked, all struct an hardware initilization should be done - * here, also, register interrupt + * pci driver it is invoked, all struct and hardware initialization should be + * done here, also, register interrupt. */ static int pm8001_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1172,10 +1172,11 @@ static int pm8001_pci_probe(struct pci_dev *pdev, return rc; } -/* +/** * pm8001_init_ccb_tag - allocate memory to CCB and tag. * @pm8001_ha: our hba card information. * @shost: scsi host which has been allocated outside. + * @pdev: pci device. */ static int pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha, struct Scsi_Host *shost, @@ -1270,7 +1271,7 @@ static void pm8001_pci_remove(struct pci_dev *pdev) * pm8001_pci_suspend - power management suspend main entry point * @dev: Device struct * - * Returns 0 success, anything else error. + * Return: 0 on success, anything else on error. */ static int __maybe_unused pm8001_pci_suspend(struct device *dev) { @@ -1315,7 +1316,7 @@ static int __maybe_unused pm8001_pci_suspend(struct device *dev) * pm8001_pci_resume - power management resume main entry point * @dev: Device struct * - * Returns 0 success, anything else error. + * Return: 0 on success, anything else on error. */ static int __maybe_unused pm8001_pci_resume(struct device *dev) { diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 6f33d821e5453d..48548a95327b5c 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -98,14 +98,16 @@ void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha) pm8001_tag_free(pm8001_ha, i); } - /** - * pm8001_mem_alloc - allocate memory for pm8001. - * @pdev: pci device. - * @virt_addr: the allocated virtual address - * @pphys_addr_hi: the physical address high byte address. - * @pphys_addr_lo: the physical address low byte address. - * @mem_size: memory size. - */ +/** + * pm8001_mem_alloc - allocate memory for pm8001. + * @pdev: pci device. + * @virt_addr: the allocated virtual address + * @pphys_addr: DMA address for this device + * @pphys_addr_hi: the physical address high byte address. + * @pphys_addr_lo: the physical address low byte address. + * @mem_size: memory size. + * @align: requested byte alignment + */ int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr, dma_addr_t *pphys_addr, u32 *pphys_addr_hi, u32 *pphys_addr_lo, u32 mem_size, u32 align) @@ -339,7 +341,7 @@ static int pm8001_task_prep_ssp_tm(struct pm8001_hba_info *pm8001_ha, } /** - * pm8001_task_prep_ssp - the dispatcher function,prepare ssp data for ssp task + * pm8001_task_prep_ssp - the dispatcher function, prepare ssp data for ssp task * @pm8001_ha: our hba card information * @ccb: the ccb which attached to ssp task */ @@ -554,10 +556,10 @@ void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha, pm8001_tag_free(pm8001_ha, ccb_idx); } - /** - * pm8001_alloc_dev - find a empty pm8001_device - * @pm8001_ha: our hba card information - */ +/** + * pm8001_alloc_dev - find a empty pm8001_device + * @pm8001_ha: our hba card information + */ static struct pm8001_device *pm8001_alloc_dev(struct pm8001_hba_info *pm8001_ha) { u32 dev; @@ -705,7 +707,7 @@ static void pm8001_tmf_timedout(struct timer_list *t) * @parameter: ssp task parameter. * * when errors or exception happened, we may want to do something, for example - * abort the issued task which result in this execption, it is done by calling + * abort the issued task which result in this exception, it is done by calling * this function, note it is also with the task execute interface. */ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, @@ -984,11 +986,12 @@ void pm8001_open_reject_retry( } /** - * pm8001_I_T_nexus_reset() - * Standard mandates link reset for ATA (type 0) and hard reset for - * SSP (type 1) , only for RECOVERY - * @dev: the device structure for the device to reset. - */ + * pm8001_I_T_nexus_reset() - reset the initiator/target connection + * @dev: the device structure for the device to reset. + * + * Standard mandates link reset for ATA (type 0) and hard reset for + * SSP (type 1), only for RECOVERY + */ int pm8001_I_T_nexus_reset(struct domain_device *dev) { int rc = TMF_RESP_FUNC_FAILED; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 45ecd963997782..6ffe17b849ae84 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -140,7 +140,7 @@ ssize_t pm80xx_get_fatal_dump(struct device *cdev, pm8001_ha->fatal_bar_loc = 0; } - /* Read until accum_len is retrived */ + /* Read until accum_len is retrieved */ accum_len = pm8001_mr32(fatal_table_address, MPI_FATAL_EDUMP_TABLE_ACCUM_LEN); /* Determine length of data between previously stored transfer length @@ -1011,7 +1011,7 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha) value); return -EBUSY; } - /* check the MPI-State for initialization upto 100ms*/ + /* check the MPI-State for initialization up to 100ms*/ max_wait_count = 5;/* 100 msec */ do { msleep(FW_READY_INTERVAL); @@ -1093,7 +1093,7 @@ static int init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha) value = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0); - /** + /* * lower 26 bits of SCRATCHPAD0 register describes offset within the * PCIe BAR where the MPI configuration table is present */ @@ -1101,7 +1101,7 @@ static int init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha) pm8001_dbg(pm8001_ha, DEV, "Scratchpad 0 Offset: 0x%x value 0x%x\n", offset, value); - /** + /* * Upper 6 bits describe the offset within PCI config space where BAR * is located. */ @@ -1109,7 +1109,7 @@ static int init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha) pcibar = get_pci_bar_index(pcilogic); pm8001_dbg(pm8001_ha, INIT, "Scratchpad 0 PCI BAR: %d\n", pcibar); - /** + /* * Make sure the offset falls inside the ioremapped PCI BAR */ if (offset > pm8001_ha->io_mem[pcibar].memsize) { @@ -1121,7 +1121,7 @@ static int init_pci_device_addresses(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl_addr = base_addr = pm8001_ha->io_mem[pcibar].memvirtaddr + offset; - /** + /* * Validate main configuration table address: first DWord should read * "PMCS" */ @@ -1385,7 +1385,7 @@ pm80xx_get_encrypt_info(struct pm8001_hba_info *pm8001_ha) } /** - * pm80xx_encrypt_update - update flash with encryption informtion + * pm80xx_encrypt_update - update flash with encryption information * @pm8001_ha: our hba card information. */ static int pm80xx_encrypt_update(struct pm8001_hba_info *pm8001_ha) @@ -1422,7 +1422,7 @@ static int pm80xx_encrypt_update(struct pm8001_hba_info *pm8001_ha) } /** - * pm80xx_chip_init - the main init function that initialize whole PM8001 chip. + * pm80xx_chip_init - the main init function that initializes whole PM8001 chip. * @pm8001_ha: our hba card information */ static int pm80xx_chip_init(struct pm8001_hba_info *pm8001_ha) @@ -1541,7 +1541,7 @@ static int mpi_uninit_check(struct pm8001_hba_info *pm8001_ha) } /** - * pm80xx_fatal_errors - returns non zero *ONLY* when fatal errors + * pm80xx_fatal_errors - returns non-zero *ONLY* when fatal errors * @pm8001_ha: our hba card information * * Fatal errors are recoverable only after a host reboot. @@ -1576,8 +1576,8 @@ pm80xx_fatal_errors(struct pm8001_hba_info *pm8001_ha) } /** - * pm80xx_chip_soft_rst - soft reset the PM8001 chip, so that the clear all - * the FW register status to the originated status. + * pm80xx_chip_soft_rst - soft reset the PM8001 chip, so that all + * FW register status are reset to the originated status. * @pm8001_ha: our hba card information */ @@ -1895,13 +1895,13 @@ static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha, } /** - * mpi_ssp_completion- process the event that FW response to the SSP request. + * mpi_ssp_completion - process the event that FW response to the SSP request. * @pm8001_ha: our hba card information * @piomb: the message contents of this outbound message. * * When FW has completed a ssp request for example a IO request, after it has - * filled the SG data with the data, it will trigger this event represent - * that he has finished the job,please check the coresponding buffer. + * filled the SG data with the data, it will trigger this event representing + * that he has finished the job; please check the corresponding buffer. * So we will tell the caller who maybe waiting the result to tell upper layer * that the task has been finished. */ @@ -3217,7 +3217,7 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) } /** - * pm80xx_hw_event_ack_req- For PM8001,some events need to acknowage to FW. + * pm80xx_hw_event_ack_req- For PM8001, some events need to acknowledge to FW. * @pm8001_ha: our hba card information * @Qnum: the outbound queue message number. * @SEA: source of event to ack @@ -3275,7 +3275,7 @@ static void hw_event_port_recover(struct pm8001_hba_info *pm8001_ha, } /** - * hw_event_sas_phy_up -FW tells me a SAS phy up event. + * hw_event_sas_phy_up - FW tells me a SAS phy up event. * @pm8001_ha: our hba card information * @piomb: IO message buffer */ @@ -3353,7 +3353,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) } /** - * hw_event_sata_phy_up -FW tells me a SATA phy up event. + * hw_event_sata_phy_up - FW tells me a SATA phy up event. * @pm8001_ha: our hba card information * @piomb: IO message buffer */ @@ -3400,7 +3400,7 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) } /** - * hw_event_phy_down -we should notify the libsas the phy is down. + * hw_event_phy_down - we should notify the libsas the phy is down. * @pm8001_ha: our hba card information * @piomb: IO message buffer */ @@ -3500,7 +3500,7 @@ static int mpi_phy_start_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) } /** - * mpi_thermal_hw_event -The hw event has come. + * mpi_thermal_hw_event - a thermal hw event has come. * @pm8001_ha: our hba card information * @piomb: IO message buffer */ @@ -3530,7 +3530,7 @@ static int mpi_thermal_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) } /** - * mpi_hw_event -The hw event has come. + * mpi_hw_event - The hw event has come. * @pm8001_ha: our hba card information * @piomb: IO message buffer */ @@ -4025,7 +4025,7 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb) case OPC_OUB_SET_DEV_INFO: pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SET_DEV_INFO\n"); break; - /* spcv specifc commands */ + /* spcv specific commands */ case OPC_OUB_PHY_START_RESP: pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_PHY_START_RESP opcode:%x\n", opc); @@ -4186,7 +4186,7 @@ static void build_smp_cmd(u32 deviceID, __le32 hTag, } /** - * pm80xx_chip_smp_req - send a SMP task to FW + * pm80xx_chip_smp_req - send an SMP task to FW * @pm8001_ha: our hba card information. * @ccb: the ccb information this request used. */ @@ -4346,7 +4346,7 @@ static int check_enc_sat_cmd(struct sas_task *task) } /** - * pm80xx_chip_ssp_io_req - send a SSP task to FW + * pm80xx_chip_ssp_io_req - send an SSP task to FW * @pm8001_ha: our hba card information. * @ccb: the ccb information this request used. */ @@ -4750,13 +4750,13 @@ pm80xx_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.ase_sh_lm_slr_phyid = cpu_to_le32(SPINHOLD_DISABLE | LINKMODE_AUTO | pm8001_ha->link_rate | phy_id); /* SSC Disable and SAS Analog ST configuration */ - /** + /* payload.ase_sh_lm_slr_phyid = cpu_to_le32(SSC_DISABLE_30 | SAS_ASE | SPINHOLD_DISABLE | LINKMODE_AUTO | LINKRATE_15 | LINKRATE_30 | LINKRATE_60 | phy_id); Have to add "SAS PHY Analog Setup SPASTI 1 Byte" Based on need - **/ + */ payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; From aa0dc6a73309f1fc779f4560a60ee766a3891adc Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Wed, 7 Jul 2021 13:47:56 +0530 Subject: [PATCH 257/714] scsi: mpi3mr: Fix W=1 compilation warnings Fix for the following W=1 compilation warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 64 Link: https://lore.kernel.org/r/20210707081756.20922-1-sreekanth.reddy@broadcom.com Reported-by: kernel test robot Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 9eceafca59bc97..2dba2b0af16674 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -2607,14 +2607,13 @@ static int mpi3mr_issue_iocinit(struct mpi3mr_ioc *mrioc) goto out; } drv_info->information_length = cpu_to_le32(data_len); - strncpy(drv_info->driver_signature, "Broadcom", sizeof(drv_info->driver_signature)); - strncpy(drv_info->os_name, utsname()->sysname, sizeof(drv_info->os_name)); - drv_info->os_name[sizeof(drv_info->os_name) - 1] = 0; - strncpy(drv_info->os_version, utsname()->release, sizeof(drv_info->os_version)); - drv_info->os_version[sizeof(drv_info->os_version) - 1] = 0; - strncpy(drv_info->driver_name, MPI3MR_DRIVER_NAME, sizeof(drv_info->driver_name)); - strncpy(drv_info->driver_version, MPI3MR_DRIVER_VERSION, sizeof(drv_info->driver_version)); - strncpy(drv_info->driver_release_date, MPI3MR_DRIVER_RELDATE, sizeof(drv_info->driver_release_date)); + strscpy(drv_info->driver_signature, "Broadcom", sizeof(drv_info->driver_signature)); + strscpy(drv_info->os_name, utsname()->sysname, sizeof(drv_info->os_name)); + strscpy(drv_info->os_version, utsname()->release, sizeof(drv_info->os_version)); + strscpy(drv_info->driver_name, MPI3MR_DRIVER_NAME, sizeof(drv_info->driver_name)); + strscpy(drv_info->driver_version, MPI3MR_DRIVER_VERSION, sizeof(drv_info->driver_version)); + strscpy(drv_info->driver_release_date, MPI3MR_DRIVER_RELDATE, + sizeof(drv_info->driver_release_date)); drv_info->driver_capabilities = 0; memcpy((u8 *)&mrioc->driver_info, (u8 *)drv_info, sizeof(mrioc->driver_info)); From 053c16ac89050ef0e8ab9dc1edaf157bf104c8c6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Jun 2021 17:51:17 -0700 Subject: [PATCH 258/714] scsi: ufs: core: Add missing host_lock in ufshcd_vops_setup_xfer_req() This patch adds a host_lock which existed before on ufshcd_vops_setup_xfer_req(). Link: https://lore.kernel.org/r/20210701005117.3846179-1-jaegeuk@kernel.org Fixes: a45f937110fa ("scsi: ufs: Optimize host lock on transfer requests send/compl paths") Cc: Stanley Chu Cc: Can Guo Cc: Bean Huo Cc: Bart Van Assche Cc: Asutosh Das Reviewed-by: Bart Van Assche Signed-off-by: Jaegeuk Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index c98d540ac044d0..194755c9ddfeb8 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1229,8 +1229,13 @@ static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba, static inline void ufshcd_vops_setup_xfer_req(struct ufs_hba *hba, int tag, bool is_scsi_cmd) { - if (hba->vops && hba->vops->setup_xfer_req) - return hba->vops->setup_xfer_req(hba, tag, is_scsi_cmd); + if (hba->vops && hba->vops->setup_xfer_req) { + unsigned long flags; + + spin_lock_irqsave(hba->host->host_lock, flags); + hba->vops->setup_xfer_req(hba, tag, is_scsi_cmd); + spin_unlock_irqrestore(hba->host->host_lock, flags); + } } static inline void ufshcd_vops_setup_task_mgmt(struct ufs_hba *hba, From e181ad43887c6b6b5995e9b191666f04242d77eb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 21:24:07 -0500 Subject: [PATCH 259/714] drm/msm: Fix fall-through warning in msm_gem_new_impl() Fix the following fall-through warning: drivers/gpu/drm/msm/msm_gem.c: In function 'msm_gem_new_impl': drivers/gpu/drm/msm/msm_gem.c:1170:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 1170 | if (priv->has_cached_coherent) | ^ drivers/gpu/drm/msm/msm_gem.c:1173:2: note: here 1173 | default: | ^~~~~~~ by replacing the /* fallthrough */ comment with fallthrough; Reported-by: Stephen Rothwell Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/msm/msm_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 141178754231ea..1e8a971a86f292 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1169,7 +1169,7 @@ static int msm_gem_new_impl(struct drm_device *dev, case MSM_BO_CACHED_COHERENT: if (priv->has_cached_coherent) break; - /* fallthrough */ + fallthrough; default: DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n", (flags & MSM_BO_CACHE_MASK)); From e48a12e546ecbfb0718176037eae0ad60598a29a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Jul 2021 06:16:05 +0200 Subject: [PATCH 260/714] jump_labels: Mark __jump_label_transform() as __always_inlined to work around aggressive compiler un-inlining In randconfig testing, certain UBSAN and CC Kconfig combinations with GCC 10.3.0: CONFIG_X86_32=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_UBSAN=y # CONFIG_UBSAN_TRAP is not set # CONFIG_UBSAN_BOUNDS is not set CONFIG_UBSAN_SHIFT=y # CONFIG_UBSAN_DIV_ZERO is not set CONFIG_UBSAN_UNREACHABLE=y CONFIG_UBSAN_BOOL=y # CONFIG_UBSAN_ENUM is not set # CONFIG_UBSAN_ALIGNMENT is not set # CONFIG_UBSAN_SANITIZE_ALL is not set ... produce this build warning (and build error if CONFIG_SECTION_MISMATCH_WARN_ONLY=y is set): WARNING: modpost: vmlinux.o(.text+0x4c1cc): Section mismatch in reference from the function __jump_label_transform() to the function .init.text:text_poke_early() The function __jump_label_transform() references the function __init text_poke_early(). This is often because __jump_label_transform lacks a __init annotation or the annotation of text_poke_early is wrong. ERROR: modpost: Section mismatches detected. The problem is that __jump_label_transform() gets uninlined by GCC, despite there being only a single local scope user of the 'static inline' function. Mark the function __always_inline instead, to work around this compiler bug/artifact. Signed-off-by: Ingo Molnar --- arch/x86/kernel/jump_label.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 674906fad43b11..68f091ba84434b 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type) return (struct jump_label_patch){.code = code, .size = size}; } -static inline void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static __always_inline void +__jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { const struct jump_label_patch jlp = __jump_label_patch(entry, type); From e9ba16e68cce2f85e9f5d2eba5c0453f1a741fd2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 11 Jul 2021 08:26:45 +0200 Subject: [PATCH 261/714] smpboot: Mark idle_init() as __always_inlined to work around aggressive compiler un-inlining While this function is a static inline, and is only used once in local scope, certain Kconfig variations may cause it to be compiled as a standalone function: 89231bf0 : 89231bf0: 83 05 60 d9 45 89 01 addl $0x1,0x8945d960 89231bf7: 55 push %ebp Resulting in this build failure: WARNING: modpost: vmlinux.o(.text.unlikely+0x7fd5): Section mismatch in reference from the function idle_init() to the function .init.text:fork_idle() The function idle_init() references the function __init fork_idle(). This is often because idle_init lacks a __init annotation or the annotation of fork_idle is wrong. ERROR: modpost: Section mismatches detected. Certain USBSAN options x86-32 builds with CONFIG_CC_OPTIMIZE_FOR_SIZE=y seem to be causing this. So mark idle_init() as __always_inline to work around this compiler bug/feature. Signed-off-by: Ingo Molnar --- kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index e4163042c4d665..21b7953f824215 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void) * * Creates the thread if it does not exist. */ -static inline void idle_init(unsigned int cpu) +static inline void __always_inline idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); From 432b52eea3dcf48083bafa4b2b6ef5b054ead609 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Jul 2021 12:16:57 +0200 Subject: [PATCH 262/714] ARM: shmobile: defconfig: Restore graphical consoles As of commit f611b1e7624ccdbd ("drm: Avoid circular dependencies for CONFIG_FB"), CONFIG_FB is no longer auto-enabled. While CONFIG_FB may be considered unneeded for systems where graphics is provided by a DRM driver, R-Mobile A1 still relies on a frame buffer device driver for graphics support. Restore support for graphics on R-Mobile A1 and graphical consoles on DRM-based systems by explicitly enabling CONFIG_FB in the defconfig for Renesas ARM systems. Fixes: f611b1e7624ccdbd ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2a4474be1d2c00c6ca97c2714844ea416a9ea9a9.1626084948.git.geert+renesas@glider.be --- arch/arm/configs/shmobile_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig index 66c8b0980a0a74..d9a27e4e09140b 100644 --- a/arch/arm/configs/shmobile_defconfig +++ b/arch/arm/configs/shmobile_defconfig @@ -135,6 +135,7 @@ CONFIG_DRM_SII902X=y CONFIG_DRM_SIMPLE_BRIDGE=y CONFIG_DRM_I2C_ADV7511=y CONFIG_DRM_I2C_ADV7511_AUDIO=y +CONFIG_FB=y CONFIG_FB_SH_MOBILE_LCDC=y CONFIG_BACKLIGHT_PWM=y CONFIG_BACKLIGHT_AS3711=y From 8b43ced64d2baca72a13caf2a7182f2853e054bd Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Tue, 13 Jul 2021 11:31:56 +0200 Subject: [PATCH 263/714] nvme-tcp: use __dev_get_by_name instead dev_get_by_name for OPT_HOST_IFACE dev_get_by_name() finds network device by name but it also increases the reference count. If a nvme-tcp queue is present and the network device driver is removed before nvme_tcp, we will face the following continuous log: "kernel:unregister_netdevice: waiting for to become free. Usage count = 2" And rmmod further halts. Similar case arises during reboot/shutdown with nvme-tcp queue present and both never completes. To fix this, use __dev_get_by_name() which finds network device by name without increasing any reference counter. Fixes: 3ede8f72a9a2 ("nvme-tcp: allow selecting the network interface for connections") Signed-off-by: Omkar Kulkarni Signed-off-by: Shai Malin Signed-off-by: Prabhakar Kushwaha Reviewed-by: Sagi Grimberg [hch: remove the ->ndev member entirely] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 12acfe05cd68fa..8cb15ee5b249ea 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -123,7 +123,6 @@ struct nvme_tcp_ctrl { struct blk_mq_tag_set admin_tag_set; struct sockaddr_storage addr; struct sockaddr_storage src_addr; - struct net_device *ndev; struct nvme_ctrl ctrl; struct work_struct err_work; @@ -2533,8 +2532,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, } if (opts->mask & NVMF_OPT_HOST_IFACE) { - ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface); - if (!ctrl->ndev) { + if (!__dev_get_by_name(&init_net, opts->host_iface)) { pr_err("invalid interface passed: %s\n", opts->host_iface); ret = -ENODEV; From e4b9852a0f4afe40604afb442e3af4452722050a Mon Sep 17 00:00:00 2001 From: Casey Chen Date: Wed, 7 Jul 2021 14:14:31 -0700 Subject: [PATCH 264/714] nvme-pci: fix multiple races in nvme_setup_io_queues Below two paths could overlap each other if we power off a drive quickly after powering it on. There are multiple races in nvme_setup_io_queues() because of shutdown_lock missing and improper use of NVMEQ_ENABLED bit. nvme_reset_work() nvme_remove() nvme_setup_io_queues() nvme_dev_disable() ... ... A1 clear NVMEQ_ENABLED bit for admin queue lock retry: B1 nvme_suspend_io_queues() A2 pci_free_irq() admin queue B2 nvme_suspend_queue() admin queue A3 pci_free_irq_vectors() nvme_pci_disable() A4 nvme_setup_irqs(); B3 pci_free_irq_vectors() ... unlock A5 queue_request_irq() for admin queue set NVMEQ_ENABLED bit ... nvme_create_io_queues() A6 result = queue_request_irq(); set NVMEQ_ENABLED bit ... fail to allocate enough IO queues: A7 nvme_suspend_io_queues() goto retry If B3 runs in between A1 and A2, it will crash if irqaction haven't been freed by A2. B2 is supposed to free admin queue IRQ but it simply can't fulfill the job as A1 has cleared NVMEQ_ENABLED bit. Fix: combine A1 A2 so IRQ get freed as soon as the NVMEQ_ENABLED bit gets cleared. After solved #1, A2 could race with B3 if A2 is freeing IRQ while B3 is checking irqaction. A3 also could race with B2 if B2 is freeing IRQ while A3 is checking irqaction. Fix: A2 and A3 take lock for mutual exclusion. A3 could race with B3 since they could run free_msi_irqs() in parallel. Fix: A3 takes lock for mutual exclusion. A4 could fail to allocate all needed IRQ vectors if A3 and A4 are interrupted by B3. Fix: A4 takes lock for mutual exclusion. If A5/A6 happened after B2/B1, B3 will crash since irqaction is not NULL. They are just allocated by A5/A6. Fix: Lock queue_request_irq() and setting of NVMEQ_ENABLED bit. A7 could get chance to pci_free_irq() for certain IO queue while B3 is checking irqaction. Fix: A7 takes lock. nvme_dev->online_queues need to be protected by shutdown_lock. Since it is not atomic, both paths could modify it using its own copy. Co-developed-by: Yuanyuan Zhong Signed-off-by: Casey Chen Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 66 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d3c5086673bcbb..46698cabef81ac 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1554,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) wmb(); /* ensure the first interrupt sees the initialization */ } +/* + * Try getting shutdown_lock while setting up IO queues. + */ +static int nvme_setup_io_queues_trylock(struct nvme_dev *dev) +{ + /* + * Give up if the lock is being held by nvme_dev_disable. + */ + if (!mutex_trylock(&dev->shutdown_lock)) + return -ENODEV; + + /* + * Controller is in wrong state, fail early. + */ + if (dev->ctrl.state != NVME_CTRL_CONNECTING) { + mutex_unlock(&dev->shutdown_lock); + return -ENODEV; + } + + return 0; +} + static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; @@ -1582,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) goto release_cq; nvmeq->cq_vector = vector; - nvme_init_queue(nvmeq, qid); + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; + nvme_init_queue(nvmeq, qid); if (!polled) { result = queue_request_irq(nvmeq); if (result < 0) @@ -1591,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) } set_bit(NVMEQ_ENABLED, &nvmeq->flags); + mutex_unlock(&dev->shutdown_lock); return result; release_sq: dev->online_queues--; + mutex_unlock(&dev->shutdown_lock); adapter_delete_sq(dev, qid); release_cq: adapter_delete_cq(dev, qid); @@ -2167,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (nr_io_queues == 0) return 0; - clear_bit(NVMEQ_ENABLED, &adminq->flags); + /* + * Free IRQ resources as soon as NVMEQ_ENABLED bit transitions + * from set to unset. If there is a window to it is truely freed, + * pci_free_irq_vectors() jumping into this window will crash. + * And take lock to avoid racing with pci_free_irq_vectors() in + * nvme_dev_disable() path. + */ + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; + if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + pci_free_irq(pdev, 0, adminq); if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, @@ -2183,14 +2221,17 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_remap_bar(dev, size); if (!result) break; - if (!--nr_io_queues) - return -ENOMEM; + if (!--nr_io_queues) { + result = -ENOMEM; + goto out_unlock; + } } while (1); adminq->q_db = dev->dbs; retry: /* Deregister the admin queue's interrupt */ - pci_free_irq(pdev, 0, adminq); + if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + pci_free_irq(pdev, 0, adminq); /* * If we enable msix early due to not intx, disable it again before @@ -2199,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) pci_free_irq_vectors(pdev); result = nvme_setup_irqs(dev, nr_io_queues); - if (result <= 0) - return -EIO; + if (result <= 0) { + result = -EIO; + goto out_unlock; + } dev->num_vecs = result; result = max(result - 1, 1); @@ -2214,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ result = queue_request_irq(adminq); if (result) - return result; + goto out_unlock; set_bit(NVMEQ_ENABLED, &adminq->flags); + mutex_unlock(&dev->shutdown_lock); result = nvme_create_io_queues(dev); if (result || dev->online_queues < 2) @@ -2224,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->online_queues - 1 < dev->max_qid) { nr_io_queues = dev->online_queues - 1; nvme_disable_io_queues(dev); + result = nvme_setup_io_queues_trylock(dev); + if (result) + return result; nvme_suspend_io_queues(dev); goto retry; } @@ -2232,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->io_queues[HCTX_TYPE_READ], dev->io_queues[HCTX_TYPE_POLL]); return 0; +out_unlock: + mutex_unlock(&dev->shutdown_lock); + return result; } static void nvme_del_queue_end(struct request *req, blk_status_t error) From 251ef6f71be2adfd09546a26643426fe62585173 Mon Sep 17 00:00:00 2001 From: Casey Chen Date: Wed, 7 Jul 2021 14:14:32 -0700 Subject: [PATCH 265/714] nvme-pci: do not call nvme_dev_remove_admin from nvme_remove nvme_dev_remove_admin could free dev->admin_q and the admin_tagset while they are being accessed by nvme_dev_disable(), which can be called by nvme_reset_work via nvme_remove_dead_ctrl. Commit cb4bfda62afa ("nvme-pci: fix hot removal during error handling") intended to avoid requests being stuck on a removed controller by killing the admin queue. But the later fix c8e9e9b7646e ("nvme-pci: unquiesce admin queue on shutdown"), together with nvme_dev_disable(dev, true) right before nvme_dev_remove_admin() could help dispatch requests and fail them early, so we don't need nvme_dev_remove_admin() any more. Fixes: cb4bfda62afa ("nvme-pci: fix hot removal during error handling") Signed-off-by: Casey Chen Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 46698cabef81ac..320051f5a3ddca 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3012,7 +3012,6 @@ static void nvme_remove(struct pci_dev *pdev) if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); - nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); From 52f83955aaf91b22f46765b007b4404ce85b2133 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 12 Jul 2021 14:08:01 +0100 Subject: [PATCH 266/714] firmware: arm_scmi: Fix kernel doc warnings Kernel doc validation script is unhappy and complains with the below set of warnings. | Function parameter or member 'fast_switch_possible' not described in 'scmi_perf_proto_ops' | Function parameter or member 'power_scale_mw_get' not described in 'scmi_perf_proto_ops' | cannot understand function prototype: 'struct scmi_sensor_reading ' | cannot understand function prototype: 'struct scmi_range_attrs ' | cannot understand function prototype: 'struct scmi_sensor_axis_info ' | cannot understand function prototype: 'struct scmi_sensor_intervals_info ' Fix them adding appropriate documents or missing keywords. Link: https://lore.kernel.org/r/20210712130801.2436492-2-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 79d0a1237e6cab..80e781c51ddc1e 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -101,6 +101,10 @@ struct scmi_clk_proto_ops { * to sustained performance level mapping * @est_power_get: gets the estimated power cost for a given performance domain * at a given frequency + * @fast_switch_possible: indicates if fast DVFS switching is possible or not + * for a given device + * @power_scale_mw_get: indicates if the power values provided are in milliWatts + * or in some other (abstract) scale */ struct scmi_perf_proto_ops { int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, @@ -153,7 +157,7 @@ struct scmi_power_proto_ops { }; /** - * scmi_sensor_reading - represent a timestamped read + * struct scmi_sensor_reading - represent a timestamped read * * Used by @reading_get_timestamped method. * @@ -167,7 +171,7 @@ struct scmi_sensor_reading { }; /** - * scmi_range_attrs - specifies a sensor or axis values' range + * struct scmi_range_attrs - specifies a sensor or axis values' range * @min_range: The minimum value which can be represented by the sensor/axis. * @max_range: The maximum value which can be represented by the sensor/axis. */ @@ -177,7 +181,7 @@ struct scmi_range_attrs { }; /** - * scmi_sensor_axis_info - describes one sensor axes + * struct scmi_sensor_axis_info - describes one sensor axes * @id: The axes ID. * @type: Axes type. Chosen amongst one of @enum scmi_sensor_class. * @scale: Power-of-10 multiplier applied to the axis unit. @@ -205,8 +209,8 @@ struct scmi_sensor_axis_info { }; /** - * scmi_sensor_intervals_info - describes number and type of available update - * intervals + * struct scmi_sensor_intervals_info - describes number and type of available + * update intervals * @segmented: Flag for segmented intervals' representation. When True there * will be exactly 3 intervals in @desc, with each entry * representing a member of a segment in this order: From 5ff6319d46cee22c9cd6f39a377e32c444f9a7b0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 12 Jul 2021 11:27:48 +0100 Subject: [PATCH 267/714] firmware: arm_scpi: Fix kernel doc warnings Kernel doc validation script is unhappy and complains with the below set of warnings. | Function parameter or member 'device_domain_id' not described in 'scpi_ops' | Function parameter or member 'get_transition_latency' not described in 'scpi_ops' | Function parameter or member 'add_opps_to_device' not described in 'scpi_ops' | Function parameter or member 'sensor_get_capability' not described in 'scpi_ops' | Function parameter or member 'sensor_get_info' not described in 'scpi_ops' | Function parameter or member 'sensor_get_value' not described in 'scpi_ops' | Function parameter or member 'device_get_power_state' not described in 'scpi_ops' | Function parameter or member 'device_set_power_state' not described in 'scpi_ops' Fix them adding appropriate documents or missing keywords. Link: https://lore.kernel.org/r/20210712130801.2436492-1-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scpi_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index afbf8037d8db80..d2176a56828a4e 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -51,6 +51,14 @@ struct scpi_sensor_info { * OPP is an index to the list return by @dvfs_get_info * @dvfs_get_info: returns the DVFS capabilities of the given power * domain. It includes the OPP list and the latency information + * @device_domain_id: gets the scpi domain id for a given device + * @get_transition_latency: gets the DVFS transition latency for a given device + * @add_opps_to_device: adds all the OPPs for a given device + * @sensor_get_capability: get the list of capabilities for the sensors + * @sensor_get_info: get the information of the specified sensor + * @sensor_get_value: gets the current value of the sensor + * @device_get_power_state: gets the power state of a power domain + * @device_set_power_state: sets the power state of a power domain */ struct scpi_ops { u32 (*get_version)(void); From b98cf55ec0bd88bdba725845c743e94ecaf57b7e Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 12 Jul 2021 15:35:04 +0100 Subject: [PATCH 268/714] firmware: arm_scmi: Fix kernel doc warnings about return values Kernel doc validation script still complains about the following: |No description found for return value of 'scmi_get_protocol_device' |No description found for return value of 'scmi_devm_notifier_register' |No description found for return value of 'scmi_devm_notifier_unregister' Fix adding missing Return kernel-doc statements. Link: https://lore.kernel.org/r/20210712143504.33541-1-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 2 ++ drivers/firmware/arm_scmi/notify.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 36d80661d473d1..1cbd2b06e52322 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1138,6 +1138,8 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id) * @proto_id and @name: if device was still not existent it is created as a * child of the specified SCMI instance @info and its transport properly * initialized as usual. + * + * Return: A properly initialized scmi device, NULL otherwise. */ static inline struct scmi_device * scmi_get_protocol_device(struct device_node *np, struct scmi_info *info, diff --git a/drivers/firmware/arm_scmi/notify.c b/drivers/firmware/arm_scmi/notify.c index d860bebd984a3d..0efd20cd9d69d8 100644 --- a/drivers/firmware/arm_scmi/notify.c +++ b/drivers/firmware/arm_scmi/notify.c @@ -1457,6 +1457,8 @@ static void scmi_devm_release_notifier(struct device *dev, void *res) * * Generic devres managed helper to register a notifier_block against a * protocol event. + * + * Return: 0 on Success */ static int scmi_devm_notifier_register(struct scmi_device *sdev, u8 proto_id, u8 evt_id, @@ -1523,6 +1525,8 @@ static int scmi_devm_notifier_match(struct device *dev, void *res, void *data) * Generic devres managed helper to explicitly un-register a notifier_block * against a protocol event, which was previously registered using the above * @scmi_devm_notifier_register. + * + * Return: 0 on Success */ static int scmi_devm_notifier_unregister(struct scmi_device *sdev, u8 proto_id, u8 evt_id, From 187a002b07e8089f0b5657eafec50b5d05625569 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 28 Jun 2021 18:00:42 +0100 Subject: [PATCH 269/714] firmware: arm_scmi: Avoid padding in sensor message structure scmi_resp_sensor_reading_complete structure is meant to represent an SCMI asynchronous reading complete message. The readings field with a 64bit type forces padding and breaks reads in scmi_sensor_reading_get. Split it in two adjacent 32bit readings_low/high subfields to avoid the padding within the structure. Alternatively we could to mark the structure packed. Link: https://lore.kernel.org/r/20210628170042.34105-1-cristian.marussi@arm.com Fixes: e2083d3673916 ("firmware: arm_scmi: Add SCMI v3.0 sensors timestamped reads") Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/sensors.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 2c88aa22155972..308471586381f4 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -166,7 +166,8 @@ struct scmi_msg_sensor_reading_get { struct scmi_resp_sensor_reading_complete { __le32 id; - __le64 readings; + __le32 readings_low; + __le32 readings_high; }; struct scmi_sensor_reading_resp { @@ -717,7 +718,8 @@ static int scmi_sensor_reading_get(const struct scmi_protocol_handle *ph, resp = t->rx.buf; if (le32_to_cpu(resp->id) == sensor_id) - *value = get_unaligned_le64(&resp->readings); + *value = + get_unaligned_le64(&resp->readings_low); else ret = -EPROTO; } From bdb8742dc6f7c599c3d61959234fe4c23638727b Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 12 Jul 2021 15:18:18 +0100 Subject: [PATCH 270/714] firmware: arm_scmi: Fix range check for the maximum number of pending messages SCMI message headers carry a sequence number and such field is sized to allow for MSG_TOKEN_MAX distinct numbers; moreover zero is not really an acceptable maximum number of pending in-flight messages. Fix accordingly the checks performed on the value exported by transports in scmi_desc.max_msg Link: https://lore.kernel.org/r/20210712141833.6628-3-cristian.marussi@arm.com Reported-by: Vincent Guittot Signed-off-by: Cristian Marussi [sudeep.holla: updated the patch title and error message] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1cbd2b06e52322..9b2e8d42a99220 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1026,8 +1026,9 @@ static int __scmi_xfer_info_init(struct scmi_info *sinfo, const struct scmi_desc *desc = sinfo->desc; /* Pre-allocated messages, no more than what hdr.seq can support */ - if (WARN_ON(desc->max_msg >= MSG_TOKEN_MAX)) { - dev_err(dev, "Maximum message of %d exceeds supported %ld\n", + if (WARN_ON(!desc->max_msg || desc->max_msg > MSG_TOKEN_MAX)) { + dev_err(dev, + "Invalid maximum messages %d, not in range [1 - %lu]\n", desc->max_msg, MSG_TOKEN_MAX); return -EINVAL; } From 0af778269a522c988ef0b4188556aba97fb420cc Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 12 Jul 2021 16:55:44 +0800 Subject: [PATCH 271/714] fbmem: Do not delete the mode that is still in use The execution of fb_delete_videomode() is not based on the result of the previous fbcon_mode_deleted(). As a result, the mode is directly deleted, regardless of whether it is still in use, which may cause UAF. ================================================================== BUG: KASAN: use-after-free in fb_mode_is_equal+0x36e/0x5e0 \ drivers/video/fbdev/core/modedb.c:924 Read of size 4 at addr ffff88807e0ddb1c by task syz-executor.0/18962 CPU: 2 PID: 18962 Comm: syz-executor.0 Not tainted 5.10.45-rc1+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ... Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x137/0x1be lib/dump_stack.c:118 print_address_description+0x6c/0x640 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report+0x13d/0x1e0 mm/kasan/report.c:562 fb_mode_is_equal+0x36e/0x5e0 drivers/video/fbdev/core/modedb.c:924 fbcon_mode_deleted+0x16a/0x220 drivers/video/fbdev/core/fbcon.c:2746 fb_set_var+0x1e1/0xdb0 drivers/video/fbdev/core/fbmem.c:975 do_fb_ioctl+0x4d9/0x6e0 drivers/video/fbdev/core/fbmem.c:1108 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 18960: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x3d/0x70 mm/kasan/common.c:56 kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0x108/0x140 mm/kasan/common.c:422 slab_free_hook mm/slub.c:1541 [inline] slab_free_freelist_hook+0xd6/0x1a0 mm/slub.c:1574 slab_free mm/slub.c:3139 [inline] kfree+0xca/0x3d0 mm/slub.c:4121 fb_delete_videomode+0x56a/0x820 drivers/video/fbdev/core/modedb.c:1104 fb_set_var+0x1f3/0xdb0 drivers/video/fbdev/core/fbmem.c:978 do_fb_ioctl+0x4d9/0x6e0 drivers/video/fbdev/core/fbmem.c:1108 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 13ff178ccd6d ("fbcon: Call fbcon_mode_deleted/new_modelist directly") Signed-off-by: Zhen Lei Cc: # v5.3+ Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210712085544.2828-1-thunder.leizhen@huawei.com --- drivers/video/fbdev/core/fbmem.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 98f193078c05ae..1c855145711ba7 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -970,13 +970,11 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) fb_var_to_videomode(&mode2, &info->var); /* make sure we don't delete the videomode of current var */ ret = fb_mode_is_equal(&mode1, &mode2); - - if (!ret) - fbcon_mode_deleted(info, &mode1); - - if (!ret) - fb_delete_videomode(&mode1, &info->modelist); - + if (!ret) { + ret = fbcon_mode_deleted(info, &mode1); + if (!ret) + fb_delete_videomode(&mode1, &info->modelist); + } return ret ? -EINVAL : 0; } From 5dd0a6b8582ffbfa88351949d50eccd5b6694ade Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Jul 2021 22:57:35 +0200 Subject: [PATCH 272/714] bpf: Fix tail_call_reachable rejection for interpreter when jit failed During testing of f263a81451c1 ("bpf: Track subprog poke descriptors correctly and fix use-after-free") under various failure conditions, for example, when jit_subprogs() fails and tries to clean up the program to be run under the interpreter, we ran into the following freeze: [...] #127/8 tailcall_bpf2bpf_3:FAIL [...] [ 92.041251] BUG: KASAN: slab-out-of-bounds in ___bpf_prog_run+0x1b9d/0x2e20 [ 92.042408] Read of size 8 at addr ffff88800da67f68 by task test_progs/682 [ 92.043707] [ 92.044030] CPU: 1 PID: 682 Comm: test_progs Tainted: G O 5.13.0-53301-ge6c08cb33a30-dirty #87 [ 92.045542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 [ 92.046785] Call Trace: [ 92.047171] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.047773] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.048389] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.049019] ? ktime_get+0x117/0x130 [...] // few hundred [similar] lines more [ 92.659025] ? ktime_get+0x117/0x130 [ 92.659845] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.660738] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.661528] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.662378] ? print_usage_bug+0x50/0x50 [ 92.663221] ? print_usage_bug+0x50/0x50 [ 92.664077] ? bpf_ksym_find+0x9c/0xe0 [ 92.664887] ? ktime_get+0x117/0x130 [ 92.665624] ? kernel_text_address+0xf5/0x100 [ 92.666529] ? __kernel_text_address+0xe/0x30 [ 92.667725] ? unwind_get_return_address+0x2f/0x50 [ 92.668854] ? ___bpf_prog_run+0x15d4/0x2e20 [ 92.670185] ? ktime_get+0x117/0x130 [ 92.671130] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.672020] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.672860] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.675159] ? ktime_get+0x117/0x130 [ 92.677074] ? lock_is_held_type+0xd5/0x130 [ 92.678662] ? ___bpf_prog_run+0x15d4/0x2e20 [ 92.680046] ? ktime_get+0x117/0x130 [ 92.681285] ? __bpf_prog_run32+0x6b/0x90 [ 92.682601] ? __bpf_prog_run64+0x90/0x90 [ 92.683636] ? lock_downgrade+0x370/0x370 [ 92.684647] ? mark_held_locks+0x44/0x90 [ 92.685652] ? ktime_get+0x117/0x130 [ 92.686752] ? lockdep_hardirqs_on+0x79/0x100 [ 92.688004] ? ktime_get+0x117/0x130 [ 92.688573] ? __cant_migrate+0x2b/0x80 [ 92.689192] ? bpf_test_run+0x2f4/0x510 [ 92.689869] ? bpf_test_timer_continue+0x1c0/0x1c0 [ 92.690856] ? rcu_read_lock_bh_held+0x90/0x90 [ 92.691506] ? __kasan_slab_alloc+0x61/0x80 [ 92.692128] ? eth_type_trans+0x128/0x240 [ 92.692737] ? __build_skb+0x46/0x50 [ 92.693252] ? bpf_prog_test_run_skb+0x65e/0xc50 [ 92.693954] ? bpf_prog_test_run_raw_tp+0x2d0/0x2d0 [ 92.694639] ? __fget_light+0xa1/0x100 [ 92.695162] ? bpf_prog_inc+0x23/0x30 [ 92.695685] ? __sys_bpf+0xb40/0x2c80 [ 92.696324] ? bpf_link_get_from_fd+0x90/0x90 [ 92.697150] ? mark_held_locks+0x24/0x90 [ 92.698007] ? lockdep_hardirqs_on_prepare+0x124/0x220 [ 92.699045] ? finish_task_switch+0xe6/0x370 [ 92.700072] ? lockdep_hardirqs_on+0x79/0x100 [ 92.701233] ? finish_task_switch+0x11d/0x370 [ 92.702264] ? __switch_to+0x2c0/0x740 [ 92.703148] ? mark_held_locks+0x24/0x90 [ 92.704155] ? __x64_sys_bpf+0x45/0x50 [ 92.705146] ? do_syscall_64+0x35/0x80 [ 92.706953] ? entry_SYSCALL_64_after_hwframe+0x44/0xae [...] Turns out that the program rejection from e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") is buggy since env->prog->aux->tail_call_reachable is never true. Commit ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") added a tracker into check_max_stack_depth() which propagates the tail_call_reachable condition throughout the subprograms. This info is then assigned to the subprogram's func[i]->aux->tail_call_reachable. However, in the case of the rejection check upon JIT failure, env->prog->aux->tail_call_reachable is used. func[0]->aux->tail_call_reachable which represents the main program's information did not propagate this to the outer env->prog->aux, though. Add this propagation into check_max_stack_depth() where it needs to belong so that the check can be done reliably. Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") Co-developed-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/618c34e3163ad1a36b1e82377576a6081e182f25.1626123173.git.daniel@iogearbox.net --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 42a4063de7cd24..9de3c9c3267cb1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3677,6 +3677,8 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) if (tail_call_reachable) for (j = 0; j < frame; j++) subprog[ret_prog[j]].tail_call_reachable = true; + if (subprog[0].tail_call_reachable) + env->prog->aux->tail_call_reachable = true; /* end of for() loop means the last insn of the 'subprog' * was reached. Doesn't matter whether it was JA or EXIT From 5acc7d3e8d342858405fbbc671221f676b547ce7 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Sat, 10 Jul 2021 11:16:35 +0800 Subject: [PATCH 273/714] xdp, net: Fix use-after-free in bpf_xdp_link_release The problem occurs between dev_get_by_index() and dev_xdp_attach_link(). At this point, dev_xdp_uninstall() is called. Then xdp link will not be detached automatically when dev is released. But link->dev already points to dev, when xdp link is released, dev will still be accessed, but dev has been released. dev_get_by_index() | link->dev = dev | | rtnl_lock() | unregister_netdevice_many() | dev_xdp_uninstall() | rtnl_unlock() rtnl_lock(); | dev_xdp_attach_link() | rtnl_unlock(); | | netdev_run_todo() // dev released bpf_xdp_link_release() | /* access dev. | use-after-free */ | [ 45.966867] BUG: KASAN: use-after-free in bpf_xdp_link_release+0x3b8/0x3d0 [ 45.967619] Read of size 8 at addr ffff00000f9980c8 by task a.out/732 [ 45.968297] [ 45.968502] CPU: 1 PID: 732 Comm: a.out Not tainted 5.13.0+ #22 [ 45.969222] Hardware name: linux,dummy-virt (DT) [ 45.969795] Call trace: [ 45.970106] dump_backtrace+0x0/0x4c8 [ 45.970564] show_stack+0x30/0x40 [ 45.970981] dump_stack_lvl+0x120/0x18c [ 45.971470] print_address_description.constprop.0+0x74/0x30c [ 45.972182] kasan_report+0x1e8/0x200 [ 45.972659] __asan_report_load8_noabort+0x2c/0x50 [ 45.973273] bpf_xdp_link_release+0x3b8/0x3d0 [ 45.973834] bpf_link_free+0xd0/0x188 [ 45.974315] bpf_link_put+0x1d0/0x218 [ 45.974790] bpf_link_release+0x3c/0x58 [ 45.975291] __fput+0x20c/0x7e8 [ 45.975706] ____fput+0x24/0x30 [ 45.976117] task_work_run+0x104/0x258 [ 45.976609] do_notify_resume+0x894/0xaf8 [ 45.977121] work_pending+0xc/0x328 [ 45.977575] [ 45.977775] The buggy address belongs to the page: [ 45.978369] page:fffffc00003e6600 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4f998 [ 45.979522] flags: 0x7fffe0000000000(node=0|zone=0|lastcpupid=0x3ffff) [ 45.980349] raw: 07fffe0000000000 fffffc00003e6708 ffff0000dac3c010 0000000000000000 [ 45.981309] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 45.982259] page dumped because: kasan: bad access detected [ 45.982948] [ 45.983153] Memory state around the buggy address: [ 45.983753] ffff00000f997f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 45.984645] ffff00000f998000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.985533] >ffff00000f998080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.986419] ^ [ 45.987112] ffff00000f998100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.988006] ffff00000f998180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.988895] ================================================================== [ 45.989773] Disabling lock debugging due to kernel taint [ 45.990552] Kernel panic - not syncing: panic_on_warn set ... [ 45.991166] CPU: 1 PID: 732 Comm: a.out Tainted: G B 5.13.0+ #22 [ 45.991929] Hardware name: linux,dummy-virt (DT) [ 45.992448] Call trace: [ 45.992753] dump_backtrace+0x0/0x4c8 [ 45.993208] show_stack+0x30/0x40 [ 45.993627] dump_stack_lvl+0x120/0x18c [ 45.994113] dump_stack+0x1c/0x34 [ 45.994530] panic+0x3a4/0x7d8 [ 45.994930] end_report+0x194/0x198 [ 45.995380] kasan_report+0x134/0x200 [ 45.995850] __asan_report_load8_noabort+0x2c/0x50 [ 45.996453] bpf_xdp_link_release+0x3b8/0x3d0 [ 45.997007] bpf_link_free+0xd0/0x188 [ 45.997474] bpf_link_put+0x1d0/0x218 [ 45.997942] bpf_link_release+0x3c/0x58 [ 45.998429] __fput+0x20c/0x7e8 [ 45.998833] ____fput+0x24/0x30 [ 45.999247] task_work_run+0x104/0x258 [ 45.999731] do_notify_resume+0x894/0xaf8 [ 46.000236] work_pending+0xc/0x328 [ 46.000697] SMP: stopping secondary CPUs [ 46.001226] Dumping ftrace buffer: [ 46.001663] (ftrace buffer empty) [ 46.002110] Kernel Offset: disabled [ 46.002545] CPU features: 0x00000001,23202c00 [ 46.003080] Memory Limit: none Fixes: aa8d3a716b59db6c ("bpf, xdp: Add bpf_link-based XDP attachment API") Reported-by: Abaci Signed-off-by: Xuan Zhuo Signed-off-by: Alexei Starovoitov Reviewed-by: Dust Li Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210710031635.41649-1-xuanzhuo@linux.alibaba.com --- net/core/dev.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a20483b..7da8d1215328e0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9700,14 +9700,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct net_device *dev; int err, fd; + rtnl_lock(); dev = dev_get_by_index(net, attr->link_create.target_ifindex); - if (!dev) + if (!dev) { + rtnl_unlock(); return -EINVAL; + } link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; - goto out_put_dev; + goto unlock; } bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); @@ -9717,14 +9720,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); - goto out_put_dev; + goto unlock; } - rtnl_lock(); err = dev_xdp_attach_link(dev, NULL, link); rtnl_unlock(); if (err) { + link->dev = NULL; bpf_link_cleanup(&link_primer); goto out_put_dev; } @@ -9734,6 +9737,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) dev_put(dev); return fd; +unlock: + rtnl_unlock(); + out_put_dev: dev_put(dev); return err; From 8cdd23c23c3d481a43b4aa03dcb5738812831115 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 12 Jul 2021 14:46:37 -0700 Subject: [PATCH 274/714] arm64: Restrict ARM64_BTI_KERNEL to clang 12.0.0 and newer Commit 97fed779f2a6 ("arm64: bti: Provide Kconfig for kernel mode BTI") disabled CONFIG_ARM64_BTI_KERNEL when CONFIG_GCOV_KERNEL was enabled and compiling with clang because of warnings that were seen with allmodconfig because LLVM was not emitting PAC/BTI instructions for compiler generated functions: | warning: some functions compiled with BTI and some compiled without BTI | warning: not setting BTI in feature flags This dependency was fine for avoiding the warnings with allmodconfig until commit 51c2ee6d121c ("Kconfig: Introduce ARCH_WANTS_NO_INSTR and CC_HAS_NO_PROFILE_FN_ATTR"), which prevents CONFIG_GCOV_KERNEL from being enabled with clang 12.0.0 or older because those versions do not support the no_profile_instrument_function attribute. As a result, CONFIG_ARM64_BTI_KERNEL gets enabled with allmodconfig and there are more warnings like the ones above due to CONFIG_KASAN, which suffers from the same problem as CONFIG_GCOV_KERNEL. This was most likely not noticed at the time because allmodconfig + CONFIG_GCOV_KERNEL=n was not tested. defconfig + CONFIG_KASAN=y is enough to reproduce the same warnings as above. The root cause of the warnings was resolved in LLVM during the 12.0.0 release so rather than play whack-a-mole with the dependencies, just update CONFIG_ARM64_BTI_KERNEL to require clang 12.0.0, which will have all of the issues ironed out. Link: https://github.com/ClangBuiltLinux/linux/issues/1428 Link: https://github.com/ClangBuiltLinux/continuous-integration2/runs/3010034706?check_suite_focus=true Link: https://github.com/ClangBuiltLinux/continuous-integration2/runs/3010035725?check_suite_focus=true Link: https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210712214636.3134425-1-nathan@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e07e7de9ac499c..b5b13a932561fc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1605,7 +1605,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 - depends on !(CC_IS_CLANG && GCOV_KERNEL) + # https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9 + depends on !CC_IS_CLANG || CLANG_VERSION >= 120000 depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help Build the kernel with Branch Target Identification annotations From 798c511548b946ae9ec123b0dfe197a5f29e63ec Mon Sep 17 00:00:00 2001 From: Jingwen Chen Date: Thu, 1 Jul 2021 10:19:17 +0800 Subject: [PATCH 275/714] drm/amdgpu: SRIOV flr_work should take write_lock [Why] If flr_work takes read_lock, then other threads who takes read_lock can access hardware when host is doing vf flr. [How] flr_work should take write_lock to avoid this case. Signed-off-by: Jingwen Chen Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 3ee481557fc974..ff2307d7ee0fa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -252,7 +252,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_read_trylock(&adev->reset_sem)) + if (!down_write_trylock(&adev->reset_sem)) return; amdgpu_virt_fini_data_exchange(adev); @@ -268,7 +268,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) flr_done: atomic_set(&adev->in_gpu_reset, 0); - up_read(&adev->reset_sem); + up_write(&adev->reset_sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 48e588d3c4098c..9f7aac435d695e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -273,7 +273,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_read_trylock(&adev->reset_sem)) + if (!down_write_trylock(&adev->reset_sem)) return; amdgpu_virt_fini_data_exchange(adev); @@ -289,7 +289,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) flr_done: atomic_set(&adev->in_gpu_reset, 0); - up_read(&adev->reset_sem); + up_write(&adev->reset_sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) From 43a44c5322d1030d8f36ad679307c61f5b4e3716 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Fri, 2 Jul 2021 18:35:14 -0400 Subject: [PATCH 276/714] drm/amdgpu: Return error if no RAS In amdgpu_ras_query_error_count() return an error if the device doesn't support RAS. This prevents that function from having to always set the values of the integer pointers (if set), and thus prevents function side effects--always to have to set values of integers if integer pointers set, regardless of whether RAS is supported or not--with this change this side effect is mitigated. Also, if no pointers are set, don't count, since we've no way of reporting the counts. Also, give this function a kernel-doc. Cc: Alexander Deucher Cc: John Clements Cc: Hawking Zhang Reported-by: Tom Rix Fixes: a46751fbcde505 ("drm/amdgpu: Fix RAS function interface") Signed-off-by: Luben Tuikov Reviewed-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 6 +-- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c13b02caf8c32a..fc66aca2859446 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -809,7 +809,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) + struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; @@ -1043,17 +1043,32 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, return ret; } -/* get the total error counts on all IPs */ -void amdgpu_ras_query_error_count(struct amdgpu_device *adev, - unsigned long *ce_count, - unsigned long *ue_count) +/** + * amdgpu_ras_query_error_count -- Get error counts of all IPs + * adev: pointer to AMD GPU device + * ce_count: pointer to an integer to be set to the count of correctible errors. + * ue_count: pointer to an integer to be set to the count of uncorrectible + * errors. + * + * If set, @ce_count or @ue_count, count and return the corresponding + * error counts in those integer pointers. Return 0 if the device + * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS. + */ +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; unsigned long ce, ue; if (!adev->ras_enabled || !con) - return; + return -EOPNOTSUPP; + + /* Don't count since no reporting. + */ + if (!ce_count && !ue_count) + return 0; ce = 0; ue = 0; @@ -1061,9 +1076,11 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_query_if info = { .head = obj->head, }; + int res; - if (amdgpu_ras_query_error_status(adev, &info)) - return; + res = amdgpu_ras_query_error_status(adev, &info); + if (res) + return res; ce += info.ce_count; ue += info.ue_count; @@ -1074,6 +1091,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (ue_count) *ue_count = ue; + + return 0; } /* query/inject/cure end */ @@ -2137,9 +2156,10 @@ static void amdgpu_ras_counte_dw(struct work_struct *work) /* Cache new values. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } pm_runtime_mark_last_busy(dev->dev); Out: @@ -2312,9 +2332,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, /* Those are the cached values at init. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } return 0; cleanup: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 256cea5d34f2b6..b504ed8c9b50be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -490,9 +490,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -void amdgpu_ras_query_error_count(struct amdgpu_device *adev, - unsigned long *ce_count, - unsigned long *ue_count); +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, From e9cfe00ba8bd437da149a3c52712d4a73e249f45 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 16 Jun 2021 17:11:12 -0400 Subject: [PATCH 277/714] drm/amd/display: Fix updating infoframe for DCN3.1 eDP [Why] We're only treating TMDS as a valid target for infoframe updates which results in PSR being unable to transition from state 4 to state 5. [How] Also allow infoframe updates for DCN3.1 - following how we handle this path for earlier ASIC as well. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index fc1fc1a4bf8b6c..836864a5a5dc0b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -390,7 +390,7 @@ void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx) is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal); is_dp = dc_is_dp_signal(pipe_ctx->stream->signal); - if (!is_hdmi_tmds) + if (!is_hdmi_tmds && !is_dp) return; if (is_hdmi_tmds) From dce9d910eb9e54c97e830637f4e3383a11bb7556 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Wed, 16 Jun 2021 16:11:23 -0400 Subject: [PATCH 278/714] Revert "drm/amd/display: Always write repeater mode regardless of LTTPR" This reverts commit 2b7605d73b97e2fa28e0817242e66ca968d2a7cb Some displays are not lighting up when put in LTTPR Transparent Mode Signed-off-by: Wesley Chalmers Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index b8832bdde2bcab..6860827159ba87 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1620,11 +1620,12 @@ enum dc_status dpcd_configure_lttpr_mode(struct dc_link *link, struct link_train { enum dc_status status = DC_OK; - if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) - status = configure_lttpr_mode_non_transparent(link, lt_settings); - else + if (lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) status = configure_lttpr_mode_transparent(link); + else if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) + status = configure_lttpr_mode_non_transparent(link, lt_settings); + return status; } From 9849e71ac0793c956883da3ca780484d3a226316 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 15 Jun 2021 15:11:31 -0400 Subject: [PATCH 279/714] drm/amd/display: remove faulty assert Signed-off-by: Dmytro Laktyushkin Reviewed-by: Wenjing Liu Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6860827159ba87..6da226bf11d596 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1785,7 +1785,6 @@ bool perform_link_training_with_retries( link_enc = stream->link_enc; else link_enc = link->link_enc; - ASSERT(link_enc); /* We need to do this before the link training to ensure the idle pattern in SST * mode will be sent right after the link training From 21cf0293d500f198ad886cbdcd3ff65f82b35edf Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Thu, 1 Jul 2021 14:09:18 +0800 Subject: [PATCH 280/714] drm/amd/pm: drop smu_v13_0_1.c|h files for yellow carp Since there's nothing special in smu implementation for yellow carp, it's better to reuse the common smu_v13_0 interfaces and drop the specific smu_v13_0_1.c|h files. v2: remove the duplicate register offset and shift mask header files as well. Signed-off-by: Xiaomeng Hou Reviewed-by: Lijo Lazar Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- .../include/asic_reg/mp/mp_13_0_1_offset.h | 355 ------------ .../include/asic_reg/mp/mp_13_0_1_sh_mask.h | 531 ------------------ drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 1 + drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h | 57 -- drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile | 2 +- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 24 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c | 311 ---------- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 39 +- 8 files changed, 57 insertions(+), 1263 deletions(-) delete mode 100644 drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_offset.h delete mode 100644 drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_sh_mask.h delete mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h delete mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_offset.h deleted file mode 100644 index dfacc6b5d89df7..00000000000000 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_offset.h +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * - */ -#ifndef _mp_13_0_1_OFFSET_HEADER -#define _mp_13_0_1_OFFSET_HEADER - - - -// addressBlock: mp_SmuMp0_SmnDec -// base address: 0x0 -#define regMP0_SMN_C2PMSG_32 0x0060 -#define regMP0_SMN_C2PMSG_32_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_33 0x0061 -#define regMP0_SMN_C2PMSG_33_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_34 0x0062 -#define regMP0_SMN_C2PMSG_34_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_35 0x0063 -#define regMP0_SMN_C2PMSG_35_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_36 0x0064 -#define regMP0_SMN_C2PMSG_36_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_37 0x0065 -#define regMP0_SMN_C2PMSG_37_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_38 0x0066 -#define regMP0_SMN_C2PMSG_38_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_39 0x0067 -#define regMP0_SMN_C2PMSG_39_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_40 0x0068 -#define regMP0_SMN_C2PMSG_40_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_41 0x0069 -#define regMP0_SMN_C2PMSG_41_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_42 0x006a -#define regMP0_SMN_C2PMSG_42_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_43 0x006b -#define regMP0_SMN_C2PMSG_43_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_44 0x006c -#define regMP0_SMN_C2PMSG_44_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_45 0x006d -#define regMP0_SMN_C2PMSG_45_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_46 0x006e -#define regMP0_SMN_C2PMSG_46_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_47 0x006f -#define regMP0_SMN_C2PMSG_47_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_48 0x0070 -#define regMP0_SMN_C2PMSG_48_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_49 0x0071 -#define regMP0_SMN_C2PMSG_49_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_50 0x0072 -#define regMP0_SMN_C2PMSG_50_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_51 0x0073 -#define regMP0_SMN_C2PMSG_51_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_52 0x0074 -#define regMP0_SMN_C2PMSG_52_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_53 0x0075 -#define regMP0_SMN_C2PMSG_53_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_54 0x0076 -#define regMP0_SMN_C2PMSG_54_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_55 0x0077 -#define regMP0_SMN_C2PMSG_55_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_56 0x0078 -#define regMP0_SMN_C2PMSG_56_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_57 0x0079 -#define regMP0_SMN_C2PMSG_57_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_58 0x007a -#define regMP0_SMN_C2PMSG_58_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_59 0x007b -#define regMP0_SMN_C2PMSG_59_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_60 0x007c -#define regMP0_SMN_C2PMSG_60_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_61 0x007d -#define regMP0_SMN_C2PMSG_61_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_62 0x007e -#define regMP0_SMN_C2PMSG_62_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_63 0x007f -#define regMP0_SMN_C2PMSG_63_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_64 0x0080 -#define regMP0_SMN_C2PMSG_64_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_65 0x0081 -#define regMP0_SMN_C2PMSG_65_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_66 0x0082 -#define regMP0_SMN_C2PMSG_66_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_67 0x0083 -#define regMP0_SMN_C2PMSG_67_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_68 0x0084 -#define regMP0_SMN_C2PMSG_68_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_69 0x0085 -#define regMP0_SMN_C2PMSG_69_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_70 0x0086 -#define regMP0_SMN_C2PMSG_70_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_71 0x0087 -#define regMP0_SMN_C2PMSG_71_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_72 0x0088 -#define regMP0_SMN_C2PMSG_72_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_73 0x0089 -#define regMP0_SMN_C2PMSG_73_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_74 0x008a -#define regMP0_SMN_C2PMSG_74_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_75 0x008b -#define regMP0_SMN_C2PMSG_75_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_76 0x008c -#define regMP0_SMN_C2PMSG_76_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_77 0x008d -#define regMP0_SMN_C2PMSG_77_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_78 0x008e -#define regMP0_SMN_C2PMSG_78_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_79 0x008f -#define regMP0_SMN_C2PMSG_79_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_80 0x0090 -#define regMP0_SMN_C2PMSG_80_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_81 0x0091 -#define regMP0_SMN_C2PMSG_81_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_82 0x0092 -#define regMP0_SMN_C2PMSG_82_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_83 0x0093 -#define regMP0_SMN_C2PMSG_83_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_84 0x0094 -#define regMP0_SMN_C2PMSG_84_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_85 0x0095 -#define regMP0_SMN_C2PMSG_85_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_86 0x0096 -#define regMP0_SMN_C2PMSG_86_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_87 0x0097 -#define regMP0_SMN_C2PMSG_87_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_88 0x0098 -#define regMP0_SMN_C2PMSG_88_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_89 0x0099 -#define regMP0_SMN_C2PMSG_89_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_90 0x009a -#define regMP0_SMN_C2PMSG_90_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_91 0x009b -#define regMP0_SMN_C2PMSG_91_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_92 0x009c -#define regMP0_SMN_C2PMSG_92_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_93 0x009d -#define regMP0_SMN_C2PMSG_93_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_94 0x009e -#define regMP0_SMN_C2PMSG_94_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_95 0x009f -#define regMP0_SMN_C2PMSG_95_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_96 0x00a0 -#define regMP0_SMN_C2PMSG_96_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_97 0x00a1 -#define regMP0_SMN_C2PMSG_97_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_98 0x00a2 -#define regMP0_SMN_C2PMSG_98_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_99 0x00a3 -#define regMP0_SMN_C2PMSG_99_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_100 0x00a4 -#define regMP0_SMN_C2PMSG_100_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_101 0x00a5 -#define regMP0_SMN_C2PMSG_101_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_102 0x00a6 -#define regMP0_SMN_C2PMSG_102_BASE_IDX 0 -#define regMP0_SMN_C2PMSG_103 0x00a7 -#define regMP0_SMN_C2PMSG_103_BASE_IDX 0 -#define regMP0_SMN_IH_CREDIT 0x00c1 -#define regMP0_SMN_IH_CREDIT_BASE_IDX 0 -#define regMP0_SMN_IH_SW_INT 0x00c2 -#define regMP0_SMN_IH_SW_INT_BASE_IDX 0 -#define regMP0_SMN_IH_SW_INT_CTRL 0x00c3 -#define regMP0_SMN_IH_SW_INT_CTRL_BASE_IDX 0 - - -// addressBlock: mp_SmuMp1_SmnDec -// base address: 0x0 -#define regMP1_SMN_C2PMSG_32 0x0260 -#define regMP1_SMN_C2PMSG_32_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_33 0x0261 -#define regMP1_SMN_C2PMSG_33_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_34 0x0262 -#define regMP1_SMN_C2PMSG_34_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_35 0x0263 -#define regMP1_SMN_C2PMSG_35_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_36 0x0264 -#define regMP1_SMN_C2PMSG_36_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_37 0x0265 -#define regMP1_SMN_C2PMSG_37_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_38 0x0266 -#define regMP1_SMN_C2PMSG_38_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_39 0x0267 -#define regMP1_SMN_C2PMSG_39_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_40 0x0268 -#define regMP1_SMN_C2PMSG_40_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_41 0x0269 -#define regMP1_SMN_C2PMSG_41_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_42 0x026a -#define regMP1_SMN_C2PMSG_42_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_43 0x026b -#define regMP1_SMN_C2PMSG_43_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_44 0x026c -#define regMP1_SMN_C2PMSG_44_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_45 0x026d -#define regMP1_SMN_C2PMSG_45_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_46 0x026e -#define regMP1_SMN_C2PMSG_46_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_47 0x026f -#define regMP1_SMN_C2PMSG_47_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_48 0x0270 -#define regMP1_SMN_C2PMSG_48_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_49 0x0271 -#define regMP1_SMN_C2PMSG_49_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_50 0x0272 -#define regMP1_SMN_C2PMSG_50_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_51 0x0273 -#define regMP1_SMN_C2PMSG_51_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_52 0x0274 -#define regMP1_SMN_C2PMSG_52_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_53 0x0275 -#define regMP1_SMN_C2PMSG_53_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_54 0x0276 -#define regMP1_SMN_C2PMSG_54_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_55 0x0277 -#define regMP1_SMN_C2PMSG_55_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_56 0x0278 -#define regMP1_SMN_C2PMSG_56_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_57 0x0279 -#define regMP1_SMN_C2PMSG_57_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_58 0x027a -#define regMP1_SMN_C2PMSG_58_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_59 0x027b -#define regMP1_SMN_C2PMSG_59_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_60 0x027c -#define regMP1_SMN_C2PMSG_60_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_61 0x027d -#define regMP1_SMN_C2PMSG_61_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_62 0x027e -#define regMP1_SMN_C2PMSG_62_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_63 0x027f -#define regMP1_SMN_C2PMSG_63_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_64 0x0280 -#define regMP1_SMN_C2PMSG_64_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_65 0x0281 -#define regMP1_SMN_C2PMSG_65_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_66 0x0282 -#define regMP1_SMN_C2PMSG_66_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_67 0x0283 -#define regMP1_SMN_C2PMSG_67_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_68 0x0284 -#define regMP1_SMN_C2PMSG_68_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_69 0x0285 -#define regMP1_SMN_C2PMSG_69_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_70 0x0286 -#define regMP1_SMN_C2PMSG_70_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_71 0x0287 -#define regMP1_SMN_C2PMSG_71_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_72 0x0288 -#define regMP1_SMN_C2PMSG_72_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_73 0x0289 -#define regMP1_SMN_C2PMSG_73_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_74 0x028a -#define regMP1_SMN_C2PMSG_74_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_75 0x028b -#define regMP1_SMN_C2PMSG_75_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_76 0x028c -#define regMP1_SMN_C2PMSG_76_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_77 0x028d -#define regMP1_SMN_C2PMSG_77_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_78 0x028e -#define regMP1_SMN_C2PMSG_78_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_79 0x028f -#define regMP1_SMN_C2PMSG_79_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_80 0x0290 -#define regMP1_SMN_C2PMSG_80_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_81 0x0291 -#define regMP1_SMN_C2PMSG_81_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_82 0x0292 -#define regMP1_SMN_C2PMSG_82_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_83 0x0293 -#define regMP1_SMN_C2PMSG_83_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_84 0x0294 -#define regMP1_SMN_C2PMSG_84_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_85 0x0295 -#define regMP1_SMN_C2PMSG_85_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_86 0x0296 -#define regMP1_SMN_C2PMSG_86_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_87 0x0297 -#define regMP1_SMN_C2PMSG_87_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_88 0x0298 -#define regMP1_SMN_C2PMSG_88_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_89 0x0299 -#define regMP1_SMN_C2PMSG_89_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_90 0x029a -#define regMP1_SMN_C2PMSG_90_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_91 0x029b -#define regMP1_SMN_C2PMSG_91_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_92 0x029c -#define regMP1_SMN_C2PMSG_92_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_93 0x029d -#define regMP1_SMN_C2PMSG_93_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_94 0x029e -#define regMP1_SMN_C2PMSG_94_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_95 0x029f -#define regMP1_SMN_C2PMSG_95_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_96 0x02a0 -#define regMP1_SMN_C2PMSG_96_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_97 0x02a1 -#define regMP1_SMN_C2PMSG_97_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_98 0x02a2 -#define regMP1_SMN_C2PMSG_98_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_99 0x02a3 -#define regMP1_SMN_C2PMSG_99_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_100 0x02a4 -#define regMP1_SMN_C2PMSG_100_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_101 0x02a5 -#define regMP1_SMN_C2PMSG_101_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_102 0x02a6 -#define regMP1_SMN_C2PMSG_102_BASE_IDX 0 -#define regMP1_SMN_C2PMSG_103 0x02a7 -#define regMP1_SMN_C2PMSG_103_BASE_IDX 0 -#define regMP1_SMN_IH_CREDIT 0x02c1 -#define regMP1_SMN_IH_CREDIT_BASE_IDX 0 -#define regMP1_SMN_IH_SW_INT 0x02c2 -#define regMP1_SMN_IH_SW_INT_BASE_IDX 0 -#define regMP1_SMN_IH_SW_INT_CTRL 0x02c3 -#define regMP1_SMN_IH_SW_INT_CTRL_BASE_IDX 0 -#define regMP1_SMN_FPS_CNT 0x02c4 -#define regMP1_SMN_FPS_CNT_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH0 0x0340 -#define regMP1_SMN_EXT_SCRATCH0_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH1 0x0341 -#define regMP1_SMN_EXT_SCRATCH1_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH2 0x0342 -#define regMP1_SMN_EXT_SCRATCH2_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH3 0x0343 -#define regMP1_SMN_EXT_SCRATCH3_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH4 0x0344 -#define regMP1_SMN_EXT_SCRATCH4_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH5 0x0345 -#define regMP1_SMN_EXT_SCRATCH5_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH6 0x0346 -#define regMP1_SMN_EXT_SCRATCH6_BASE_IDX 0 -#define regMP1_SMN_EXT_SCRATCH7 0x0347 -#define regMP1_SMN_EXT_SCRATCH7_BASE_IDX 0 - - -#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_sh_mask.h deleted file mode 100644 index 2d5e8b58e69371..00000000000000 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_1_sh_mask.h +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * - */ -#ifndef _mp_13_0_1_SH_MASK_HEADER -#define _mp_13_0_1_SH_MASK_HEADER - - -// addressBlock: mp_SmuMp0_SmnDec -//MP0_SMN_C2PMSG_32 -#define MP0_SMN_C2PMSG_32__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_32__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_33 -#define MP0_SMN_C2PMSG_33__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_33__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_34 -#define MP0_SMN_C2PMSG_34__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_34__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_35 -#define MP0_SMN_C2PMSG_35__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_35__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_36 -#define MP0_SMN_C2PMSG_36__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_36__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_37 -#define MP0_SMN_C2PMSG_37__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_37__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_38 -#define MP0_SMN_C2PMSG_38__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_38__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_39 -#define MP0_SMN_C2PMSG_39__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_39__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_40 -#define MP0_SMN_C2PMSG_40__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_40__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_41 -#define MP0_SMN_C2PMSG_41__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_41__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_42 -#define MP0_SMN_C2PMSG_42__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_42__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_43 -#define MP0_SMN_C2PMSG_43__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_43__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_44 -#define MP0_SMN_C2PMSG_44__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_44__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_45 -#define MP0_SMN_C2PMSG_45__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_45__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_46 -#define MP0_SMN_C2PMSG_46__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_46__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_47 -#define MP0_SMN_C2PMSG_47__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_47__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_48 -#define MP0_SMN_C2PMSG_48__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_48__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_49 -#define MP0_SMN_C2PMSG_49__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_49__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_50 -#define MP0_SMN_C2PMSG_50__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_50__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_51 -#define MP0_SMN_C2PMSG_51__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_51__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_52 -#define MP0_SMN_C2PMSG_52__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_52__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_53 -#define MP0_SMN_C2PMSG_53__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_53__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_54 -#define MP0_SMN_C2PMSG_54__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_54__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_55 -#define MP0_SMN_C2PMSG_55__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_55__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_56 -#define MP0_SMN_C2PMSG_56__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_56__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_57 -#define MP0_SMN_C2PMSG_57__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_57__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_58 -#define MP0_SMN_C2PMSG_58__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_58__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_59 -#define MP0_SMN_C2PMSG_59__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_59__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_60 -#define MP0_SMN_C2PMSG_60__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_60__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_61 -#define MP0_SMN_C2PMSG_61__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_61__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_62 -#define MP0_SMN_C2PMSG_62__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_62__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_63 -#define MP0_SMN_C2PMSG_63__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_63__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_64 -#define MP0_SMN_C2PMSG_64__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_64__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_65 -#define MP0_SMN_C2PMSG_65__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_65__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_66 -#define MP0_SMN_C2PMSG_66__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_66__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_67 -#define MP0_SMN_C2PMSG_67__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_67__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_68 -#define MP0_SMN_C2PMSG_68__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_68__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_69 -#define MP0_SMN_C2PMSG_69__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_69__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_70 -#define MP0_SMN_C2PMSG_70__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_70__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_71 -#define MP0_SMN_C2PMSG_71__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_71__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_72 -#define MP0_SMN_C2PMSG_72__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_72__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_73 -#define MP0_SMN_C2PMSG_73__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_73__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_74 -#define MP0_SMN_C2PMSG_74__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_74__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_75 -#define MP0_SMN_C2PMSG_75__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_75__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_76 -#define MP0_SMN_C2PMSG_76__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_76__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_77 -#define MP0_SMN_C2PMSG_77__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_77__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_78 -#define MP0_SMN_C2PMSG_78__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_78__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_79 -#define MP0_SMN_C2PMSG_79__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_79__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_80 -#define MP0_SMN_C2PMSG_80__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_80__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_81 -#define MP0_SMN_C2PMSG_81__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_81__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_82 -#define MP0_SMN_C2PMSG_82__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_82__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_83 -#define MP0_SMN_C2PMSG_83__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_83__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_84 -#define MP0_SMN_C2PMSG_84__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_84__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_85 -#define MP0_SMN_C2PMSG_85__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_85__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_86 -#define MP0_SMN_C2PMSG_86__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_86__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_87 -#define MP0_SMN_C2PMSG_87__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_87__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_88 -#define MP0_SMN_C2PMSG_88__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_88__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_89 -#define MP0_SMN_C2PMSG_89__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_89__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_90 -#define MP0_SMN_C2PMSG_90__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_90__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_91 -#define MP0_SMN_C2PMSG_91__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_91__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_92 -#define MP0_SMN_C2PMSG_92__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_92__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_93 -#define MP0_SMN_C2PMSG_93__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_93__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_94 -#define MP0_SMN_C2PMSG_94__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_94__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_95 -#define MP0_SMN_C2PMSG_95__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_95__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_96 -#define MP0_SMN_C2PMSG_96__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_96__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_97 -#define MP0_SMN_C2PMSG_97__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_97__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_98 -#define MP0_SMN_C2PMSG_98__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_98__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_99 -#define MP0_SMN_C2PMSG_99__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_99__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_100 -#define MP0_SMN_C2PMSG_100__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_100__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_101 -#define MP0_SMN_C2PMSG_101__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_101__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_102 -#define MP0_SMN_C2PMSG_102__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_102__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_C2PMSG_103 -#define MP0_SMN_C2PMSG_103__CONTENT__SHIFT 0x0 -#define MP0_SMN_C2PMSG_103__CONTENT_MASK 0xFFFFFFFFL -//MP0_SMN_IH_CREDIT -#define MP0_SMN_IH_CREDIT__CREDIT_VALUE__SHIFT 0x0 -#define MP0_SMN_IH_CREDIT__CLIENT_ID__SHIFT 0x10 -#define MP0_SMN_IH_CREDIT__CREDIT_VALUE_MASK 0x00000003L -#define MP0_SMN_IH_CREDIT__CLIENT_ID_MASK 0x00FF0000L -//MP0_SMN_IH_SW_INT -#define MP0_SMN_IH_SW_INT__ID__SHIFT 0x0 -#define MP0_SMN_IH_SW_INT__VALID__SHIFT 0x8 -#define MP0_SMN_IH_SW_INT__ID_MASK 0x000000FFL -#define MP0_SMN_IH_SW_INT__VALID_MASK 0x00000100L -//MP0_SMN_IH_SW_INT_CTRL -#define MP0_SMN_IH_SW_INT_CTRL__INT_MASK__SHIFT 0x0 -#define MP0_SMN_IH_SW_INT_CTRL__INT_ACK__SHIFT 0x8 -#define MP0_SMN_IH_SW_INT_CTRL__INT_MASK_MASK 0x00000001L -#define MP0_SMN_IH_SW_INT_CTRL__INT_ACK_MASK 0x00000100L - - -// addressBlock: mp_SmuMp1Pub_CruDec -//MP1_FIRMWARE_FLAGS -#define MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT 0x0 -#define MP1_FIRMWARE_FLAGS__RESERVED__SHIFT 0x1 -#define MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK 0x00000001L -#define MP1_FIRMWARE_FLAGS__RESERVED_MASK 0xFFFFFFFEL - - -// addressBlock: mp_SmuMp1_SmnDec -//MP1_SMN_C2PMSG_32 -#define MP1_SMN_C2PMSG_32__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_32__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_33 -#define MP1_SMN_C2PMSG_33__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_33__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_34 -#define MP1_SMN_C2PMSG_34__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_34__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_35 -#define MP1_SMN_C2PMSG_35__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_35__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_36 -#define MP1_SMN_C2PMSG_36__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_36__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_37 -#define MP1_SMN_C2PMSG_37__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_37__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_38 -#define MP1_SMN_C2PMSG_38__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_38__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_39 -#define MP1_SMN_C2PMSG_39__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_39__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_40 -#define MP1_SMN_C2PMSG_40__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_40__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_41 -#define MP1_SMN_C2PMSG_41__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_41__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_42 -#define MP1_SMN_C2PMSG_42__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_42__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_43 -#define MP1_SMN_C2PMSG_43__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_43__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_44 -#define MP1_SMN_C2PMSG_44__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_44__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_45 -#define MP1_SMN_C2PMSG_45__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_45__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_46 -#define MP1_SMN_C2PMSG_46__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_46__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_47 -#define MP1_SMN_C2PMSG_47__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_47__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_48 -#define MP1_SMN_C2PMSG_48__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_48__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_49 -#define MP1_SMN_C2PMSG_49__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_49__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_50 -#define MP1_SMN_C2PMSG_50__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_50__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_51 -#define MP1_SMN_C2PMSG_51__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_51__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_52 -#define MP1_SMN_C2PMSG_52__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_52__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_53 -#define MP1_SMN_C2PMSG_53__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_53__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_54 -#define MP1_SMN_C2PMSG_54__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_54__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_55 -#define MP1_SMN_C2PMSG_55__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_55__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_56 -#define MP1_SMN_C2PMSG_56__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_56__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_57 -#define MP1_SMN_C2PMSG_57__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_57__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_58 -#define MP1_SMN_C2PMSG_58__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_58__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_59 -#define MP1_SMN_C2PMSG_59__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_59__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_60 -#define MP1_SMN_C2PMSG_60__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_60__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_61 -#define MP1_SMN_C2PMSG_61__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_61__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_62 -#define MP1_SMN_C2PMSG_62__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_62__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_63 -#define MP1_SMN_C2PMSG_63__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_63__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_64 -#define MP1_SMN_C2PMSG_64__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_64__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_65 -#define MP1_SMN_C2PMSG_65__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_65__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_66 -#define MP1_SMN_C2PMSG_66__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_66__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_67 -#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_67__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_68 -#define MP1_SMN_C2PMSG_68__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_68__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_69 -#define MP1_SMN_C2PMSG_69__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_69__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_70 -#define MP1_SMN_C2PMSG_70__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_70__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_71 -#define MP1_SMN_C2PMSG_71__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_71__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_72 -#define MP1_SMN_C2PMSG_72__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_72__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_73 -#define MP1_SMN_C2PMSG_73__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_73__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_74 -#define MP1_SMN_C2PMSG_74__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_74__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_75 -#define MP1_SMN_C2PMSG_75__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_75__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_76 -#define MP1_SMN_C2PMSG_76__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_76__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_77 -#define MP1_SMN_C2PMSG_77__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_77__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_78 -#define MP1_SMN_C2PMSG_78__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_78__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_79 -#define MP1_SMN_C2PMSG_79__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_79__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_80 -#define MP1_SMN_C2PMSG_80__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_80__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_81 -#define MP1_SMN_C2PMSG_81__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_81__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_82 -#define MP1_SMN_C2PMSG_82__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_82__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_83 -#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_83__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_84 -#define MP1_SMN_C2PMSG_84__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_84__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_85 -#define MP1_SMN_C2PMSG_85__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_85__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_86 -#define MP1_SMN_C2PMSG_86__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_86__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_87 -#define MP1_SMN_C2PMSG_87__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_87__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_88 -#define MP1_SMN_C2PMSG_88__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_88__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_89 -#define MP1_SMN_C2PMSG_89__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_89__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_90 -#define MP1_SMN_C2PMSG_90__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_90__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_91 -#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_91__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_92 -#define MP1_SMN_C2PMSG_92__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_92__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_93 -#define MP1_SMN_C2PMSG_93__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_93__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_94 -#define MP1_SMN_C2PMSG_94__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_94__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_95 -#define MP1_SMN_C2PMSG_95__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_95__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_96 -#define MP1_SMN_C2PMSG_96__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_96__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_97 -#define MP1_SMN_C2PMSG_97__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_97__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_98 -#define MP1_SMN_C2PMSG_98__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_98__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_99 -#define MP1_SMN_C2PMSG_99__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_99__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_100 -#define MP1_SMN_C2PMSG_100__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_100__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_101 -#define MP1_SMN_C2PMSG_101__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_101__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_102 -#define MP1_SMN_C2PMSG_102__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_102__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_C2PMSG_103 -#define MP1_SMN_C2PMSG_103__CONTENT__SHIFT 0x0 -#define MP1_SMN_C2PMSG_103__CONTENT_MASK 0xFFFFFFFFL -//MP1_SMN_IH_CREDIT -#define MP1_SMN_IH_CREDIT__CREDIT_VALUE__SHIFT 0x0 -#define MP1_SMN_IH_CREDIT__CLIENT_ID__SHIFT 0x10 -#define MP1_SMN_IH_CREDIT__CREDIT_VALUE_MASK 0x00000003L -#define MP1_SMN_IH_CREDIT__CLIENT_ID_MASK 0x00FF0000L -//MP1_SMN_IH_SW_INT -#define MP1_SMN_IH_SW_INT__ID__SHIFT 0x0 -#define MP1_SMN_IH_SW_INT__VALID__SHIFT 0x8 -#define MP1_SMN_IH_SW_INT__ID_MASK 0x000000FFL -#define MP1_SMN_IH_SW_INT__VALID_MASK 0x00000100L -//MP1_SMN_IH_SW_INT_CTRL -#define MP1_SMN_IH_SW_INT_CTRL__INT_MASK__SHIFT 0x0 -#define MP1_SMN_IH_SW_INT_CTRL__INT_ACK__SHIFT 0x8 -#define MP1_SMN_IH_SW_INT_CTRL__INT_MASK_MASK 0x00000001L -#define MP1_SMN_IH_SW_INT_CTRL__INT_ACK_MASK 0x00000100L -//MP1_SMN_FPS_CNT -#define MP1_SMN_FPS_CNT__COUNT__SHIFT 0x0 -#define MP1_SMN_FPS_CNT__COUNT_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH0 -#define MP1_SMN_EXT_SCRATCH0__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH0__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH1 -#define MP1_SMN_EXT_SCRATCH1__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH1__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH2 -#define MP1_SMN_EXT_SCRATCH2__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH2__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH3 -#define MP1_SMN_EXT_SCRATCH3__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH3__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH4 -#define MP1_SMN_EXT_SCRATCH4__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH4__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH5 -#define MP1_SMN_EXT_SCRATCH5__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH5__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH6 -#define MP1_SMN_EXT_SCRATCH6__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH6__DATA_MASK 0xFFFFFFFFL -//MP1_SMN_EXT_SCRATCH7 -#define MP1_SMN_EXT_SCRATCH7__DATA__SHIFT 0x0 -#define MP1_SMN_EXT_SCRATCH7__DATA_MASK 0xFFFFFFFFL - - -#endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index 6119a36b2cba0f..3fea2430dec023 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -26,6 +26,7 @@ #include "amdgpu_smu.h" #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF +#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x03 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07 /* MP Apertures */ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h deleted file mode 100644 index b6c976a4d5788f..00000000000000 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#ifndef __SMU_V13_0_1_H__ -#define __SMU_V13_0_1_H__ - -#include "amdgpu_smu.h" - -#define SMU13_0_1_DRIVER_IF_VERSION_INV 0xFFFFFFFF -#define SMU13_0_1_DRIVER_IF_VERSION_YELLOW_CARP 0x3 - -/* MP Apertures */ -#define MP0_Public 0x03800000 -#define MP0_SRAM 0x03900000 -#define MP1_Public 0x03b00000 -#define MP1_SRAM 0x03c00004 - -/* address block */ -#define smnMP1_FIRMWARE_FLAGS 0x3010024 - - -#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) - -int smu_v13_0_1_check_fw_status(struct smu_context *smu); - -int smu_v13_0_1_check_fw_version(struct smu_context *smu); - -int smu_v13_0_1_fini_smc_tables(struct smu_context *smu); - -int smu_v13_0_1_get_vbios_bootup_values(struct smu_context *smu); - -int smu_v13_0_1_set_default_dpm_tables(struct smu_context *smu); - -int smu_v13_0_1_set_driver_table_location(struct smu_context *smu); - -int smu_v13_0_1_gfx_off_control(struct smu_context *smu, bool enable); -#endif -#endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile index 9b3a8503f5cd42..d4c4c495762c92 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'smu manager' sub-component of powerplay. # It provides the smu management services for the driver. -SMU13_MGR = smu_v13_0.o aldebaran_ppt.o smu_v13_0_1.o yellow_carp_ppt.o +SMU13_MGR = smu_v13_0.o aldebaran_ppt.o yellow_carp_ppt.o AMD_SWSMU_SMU13MGR = $(addprefix $(AMD_SWSMU_PATH)/smu13/,$(SMU13_MGR)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a3dc7194aaf88c..a421ba85bd6dce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -210,6 +210,9 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) case CHIP_ALDEBARAN: smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE; break; + case CHIP_YELLOW_CARP: + smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; + break; default: dev_err(smu->adev->dev, "smu unsupported asic type:%d.\n", smu->adev->asic_type); smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_INV; @@ -694,6 +697,27 @@ int smu_v13_0_set_allowed_mask(struct smu_context *smu) return ret; } +int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable) +{ + int ret = 0; + struct amdgpu_device *adev = smu->adev; + + switch (adev->asic_type) { + case CHIP_YELLOW_CARP: + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return 0; + if (enable) + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_AllowGfxOff, NULL); + else + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_DisallowGfxOff, NULL); + break; + default: + break; + } + + return ret; +} + int smu_v13_0_system_features_control(struct smu_context *smu, bool en) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c deleted file mode 100644 index 61917b49f2bfde..00000000000000 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -//#include - -#define SWSMU_CODE_LAYER_L3 - -#include "amdgpu.h" -#include "amdgpu_smu.h" -#include "smu_v13_0_1.h" -#include "soc15_common.h" -#include "smu_cmn.h" -#include "atomfirmware.h" -#include "amdgpu_atomfirmware.h" -#include "amdgpu_atombios.h" -#include "atom.h" - -#include "asic_reg/mp/mp_13_0_1_offset.h" -#include "asic_reg/mp/mp_13_0_1_sh_mask.h" - -/* - * DO NOT use these for err/warn/info/debug messages. - * Use dev_err, dev_warn, dev_info and dev_dbg instead. - * They are more MGPU friendly. - */ -#undef pr_err -#undef pr_warn -#undef pr_info -#undef pr_debug - -int smu_v13_0_1_check_fw_status(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - uint32_t mp1_fw_flags; - - mp1_fw_flags = RREG32_PCIE(MP1_Public | - (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); - - if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> - MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) - return 0; - - return -EIO; -} - -int smu_v13_0_1_check_fw_version(struct smu_context *smu) -{ - uint32_t if_version = 0xff, smu_version = 0xff; - uint16_t smu_major; - uint8_t smu_minor, smu_debug; - int ret = 0; - - ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); - if (ret) - return ret; - - smu_major = (smu_version >> 16) & 0xffff; - smu_minor = (smu_version >> 8) & 0xff; - smu_debug = (smu_version >> 0) & 0xff; - - switch (smu->adev->asic_type) { - case CHIP_YELLOW_CARP: - smu->smc_driver_if_version = SMU13_0_1_DRIVER_IF_VERSION_YELLOW_CARP; - break; - - default: - dev_err(smu->adev->dev, "smu unsupported asic type:%d.\n", smu->adev->asic_type); - smu->smc_driver_if_version = SMU13_0_1_DRIVER_IF_VERSION_INV; - break; - } - - dev_info(smu->adev->dev, "smu fw reported version = 0x%08x (%d.%d.%d)\n", - smu_version, smu_major, smu_minor, smu_debug); - - /* - * 1. if_version mismatch is not critical as our fw is designed - * to be backward compatible. - * 2. New fw usually brings some optimizations. But that's visible - * only on the paired driver. - * Considering above, we just leave user a warning message instead - * of halt driver loading. - */ - if (if_version != smu->smc_driver_if_version) { - dev_info(smu->adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " - "smu fw version = 0x%08x (%d.%d.%d)\n", - smu->smc_driver_if_version, if_version, - smu_version, smu_major, smu_minor, smu_debug); - dev_warn(smu->adev->dev, "SMU driver if version not matched\n"); - } - - return ret; -} - -int smu_v13_0_1_fini_smc_tables(struct smu_context *smu) -{ - struct smu_table_context *smu_table = &smu->smu_table; - - kfree(smu_table->clocks_table); - smu_table->clocks_table = NULL; - - kfree(smu_table->metrics_table); - smu_table->metrics_table = NULL; - - kfree(smu_table->watermarks_table); - smu_table->watermarks_table = NULL; - - return 0; -} - -static int smu_v13_0_1_atom_get_smu_clockinfo(struct amdgpu_device *adev, - uint8_t clk_id, - uint8_t syspll_id, - uint32_t *clk_freq) -{ - struct atom_get_smu_clock_info_parameters_v3_1 input = {0}; - struct atom_get_smu_clock_info_output_parameters_v3_1 *output; - int ret, index; - - input.clk_id = clk_id; - input.syspll_id = syspll_id; - input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; - index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, - getsmuclockinfo); - - ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, - (uint32_t *)&input); - if (ret) - return -EINVAL; - - output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; - *clk_freq = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; - - return 0; -} - -int smu_v13_0_1_get_vbios_bootup_values(struct smu_context *smu) -{ - int ret, index; - uint16_t size; - uint8_t frev, crev; - struct atom_common_table_header *header; - struct atom_firmware_info_v3_4 *v_3_4; - struct atom_firmware_info_v3_3 *v_3_3; - struct atom_firmware_info_v3_1 *v_3_1; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); - - ret = amdgpu_atombios_get_data_table(smu->adev, index, &size, &frev, &crev, - (uint8_t **)&header); - if (ret) - return ret; - - if (header->format_revision != 3) { - dev_err(smu->adev->dev, "unknown atom_firmware_info version! for smu13\n"); - return -EINVAL; - } - - switch (header->content_revision) { - case 0: - case 1: - case 2: - v_3_1 = (struct atom_firmware_info_v3_1 *)header; - smu->smu_table.boot_values.revision = v_3_1->firmware_revision; - smu->smu_table.boot_values.gfxclk = v_3_1->bootup_sclk_in10khz; - smu->smu_table.boot_values.uclk = v_3_1->bootup_mclk_in10khz; - smu->smu_table.boot_values.socclk = 0; - smu->smu_table.boot_values.dcefclk = 0; - smu->smu_table.boot_values.vddc = v_3_1->bootup_vddc_mv; - smu->smu_table.boot_values.vddci = v_3_1->bootup_vddci_mv; - smu->smu_table.boot_values.mvddc = v_3_1->bootup_mvddc_mv; - smu->smu_table.boot_values.vdd_gfx = v_3_1->bootup_vddgfx_mv; - smu->smu_table.boot_values.cooling_id = v_3_1->coolingsolution_id; - break; - case 3: - v_3_3 = (struct atom_firmware_info_v3_3 *)header; - smu->smu_table.boot_values.revision = v_3_3->firmware_revision; - smu->smu_table.boot_values.gfxclk = v_3_3->bootup_sclk_in10khz; - smu->smu_table.boot_values.uclk = v_3_3->bootup_mclk_in10khz; - smu->smu_table.boot_values.socclk = 0; - smu->smu_table.boot_values.dcefclk = 0; - smu->smu_table.boot_values.vddc = v_3_3->bootup_vddc_mv; - smu->smu_table.boot_values.vddci = v_3_3->bootup_vddci_mv; - smu->smu_table.boot_values.mvddc = v_3_3->bootup_mvddc_mv; - smu->smu_table.boot_values.vdd_gfx = v_3_3->bootup_vddgfx_mv; - smu->smu_table.boot_values.cooling_id = v_3_3->coolingsolution_id; - break; - case 4: - default: - v_3_4 = (struct atom_firmware_info_v3_4 *)header; - smu->smu_table.boot_values.revision = v_3_4->firmware_revision; - smu->smu_table.boot_values.gfxclk = v_3_4->bootup_sclk_in10khz; - smu->smu_table.boot_values.uclk = v_3_4->bootup_mclk_in10khz; - smu->smu_table.boot_values.socclk = 0; - smu->smu_table.boot_values.dcefclk = 0; - smu->smu_table.boot_values.vddc = v_3_4->bootup_vddc_mv; - smu->smu_table.boot_values.vddci = v_3_4->bootup_vddci_mv; - smu->smu_table.boot_values.mvddc = v_3_4->bootup_mvddc_mv; - smu->smu_table.boot_values.vdd_gfx = v_3_4->bootup_vddgfx_mv; - smu->smu_table.boot_values.cooling_id = v_3_4->coolingsolution_id; - break; - } - - smu->smu_table.boot_values.format_revision = header->format_revision; - smu->smu_table.boot_values.content_revision = header->content_revision; - - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL0_SOCCLK_ID, - (uint8_t)0, - &smu->smu_table.boot_values.socclk); - - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL0_DCEFCLK_ID, - (uint8_t)0, - &smu->smu_table.boot_values.dcefclk); - - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL0_ECLK_ID, - (uint8_t)0, - &smu->smu_table.boot_values.eclk); - - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL0_VCLK_ID, - (uint8_t)0, - &smu->smu_table.boot_values.vclk); - - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL0_DCLK_ID, - (uint8_t)0, - &smu->smu_table.boot_values.dclk); - - if ((smu->smu_table.boot_values.format_revision == 3) && - (smu->smu_table.boot_values.content_revision >= 2)) - smu_v13_0_1_atom_get_smu_clockinfo(smu->adev, - (uint8_t)SMU11_SYSPLL1_0_FCLK_ID, - (uint8_t)SMU11_SYSPLL1_2_ID, - &smu->smu_table.boot_values.fclk); - - return 0; -} - -int smu_v13_0_1_set_default_dpm_tables(struct smu_context *smu) -{ - struct smu_table_context *smu_table = &smu->smu_table; - - return smu_cmn_update_table(smu, SMU_TABLE_DPMCLOCKS, 0, smu_table->clocks_table, false); -} - -int smu_v13_0_1_set_driver_table_location(struct smu_context *smu) -{ - struct smu_table *driver_table = &smu->smu_table.driver_table; - int ret = 0; - - if (!driver_table->mc_address) - return 0; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetDriverDramAddrHigh, - upper_32_bits(driver_table->mc_address), - NULL); - - if (ret) - return ret; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetDriverDramAddrLow, - lower_32_bits(driver_table->mc_address), - NULL); - - return ret; -} - -int smu_v13_0_1_gfx_off_control(struct smu_context *smu, bool enable) -{ - int ret = 0; - struct amdgpu_device *adev = smu->adev; - - switch (adev->asic_type) { - case CHIP_YELLOW_CARP: - if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) - return 0; - if (enable) - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_AllowGfxOff, NULL); - else - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_DisallowGfxOff, NULL); - break; - default: - break; - } - - return ret; -} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 18a1ffdca2272e..de92f713a91185 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -25,7 +25,7 @@ #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_v13_0_1.h" +#include "smu_v13_0.h" #include "smu13_driver_if_yellow_carp.h" #include "yellow_carp_ppt.h" #include "smu_v13_0_1_ppsmc.h" @@ -186,6 +186,22 @@ static int yellow_carp_init_smc_tables(struct smu_context *smu) return -ENOMEM; } +static int yellow_carp_fini_smc_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + + kfree(smu_table->clocks_table); + smu_table->clocks_table = NULL; + + kfree(smu_table->metrics_table); + smu_table->metrics_table = NULL; + + kfree(smu_table->watermarks_table); + smu_table->watermarks_table = NULL; + + return 0; +} + static int yellow_carp_system_features_control(struct smu_context *smu, bool en) { struct smu_feature *feature = &smu->smu_feature; @@ -659,6 +675,13 @@ static ssize_t yellow_carp_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v2_1); } +static int yellow_carp_set_default_dpm_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + + return smu_cmn_update_table(smu, SMU_TABLE_DPMCLOCKS, 0, smu_table->clocks_table, false); +} + static int yellow_carp_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { @@ -1203,17 +1226,17 @@ static int yellow_carp_set_fine_grain_gfx_freq_parameters(struct smu_context *sm } static const struct pptable_funcs yellow_carp_ppt_funcs = { - .check_fw_status = smu_v13_0_1_check_fw_status, - .check_fw_version = smu_v13_0_1_check_fw_version, + .check_fw_status = smu_v13_0_check_fw_status, + .check_fw_version = smu_v13_0_check_fw_version, .init_smc_tables = yellow_carp_init_smc_tables, - .fini_smc_tables = smu_v13_0_1_fini_smc_tables, - .get_vbios_bootup_values = smu_v13_0_1_get_vbios_bootup_values, + .fini_smc_tables = yellow_carp_fini_smc_tables, + .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .system_features_control = yellow_carp_system_features_control, .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, .send_smc_msg = smu_cmn_send_smc_msg, .dpm_set_vcn_enable = yellow_carp_dpm_set_vcn_enable, .dpm_set_jpeg_enable = yellow_carp_dpm_set_jpeg_enable, - .set_default_dpm_table = smu_v13_0_1_set_default_dpm_tables, + .set_default_dpm_table = yellow_carp_set_default_dpm_tables, .read_sensor = yellow_carp_read_sensor, .is_dpm_running = yellow_carp_is_dpm_running, .set_watermarks_table = yellow_carp_set_watermarks_table, @@ -1222,8 +1245,8 @@ static const struct pptable_funcs yellow_carp_ppt_funcs = { .get_gpu_metrics = yellow_carp_get_gpu_metrics, .get_enabled_mask = smu_cmn_get_enabled_32_bits_mask, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, - .set_driver_table_location = smu_v13_0_1_set_driver_table_location, - .gfx_off_control = smu_v13_0_1_gfx_off_control, + .set_driver_table_location = smu_v13_0_set_driver_table_location, + .gfx_off_control = smu_v13_0_gfx_off_control, .post_init = yellow_carp_post_smu_init, .mode2_reset = yellow_carp_mode2_reset, .get_dpm_ultimate_freq = yellow_carp_get_dpm_ultimate_freq, From 834b8245d6bbd57900ca6c79d7f9e0279a8d51c2 Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Wed, 7 Jul 2021 16:50:01 +0800 Subject: [PATCH 281/714] drm/amd/display: update header file name Update the register header file name. Signed-off-by: Xiaomeng Hou Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index 66db5e988bc1bd..dad4a4c18bcf37 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -31,8 +31,8 @@ #include "dcn31_smu.h" #include "yellow_carp_offset.h" -#include "mp/mp_13_0_1_offset.h" -#include "mp/mp_13_0_1_sh_mask.h" +#include "mp/mp_13_0_2_offset.h" +#include "mp/mp_13_0_2_sh_mask.h" #define REG(reg_name) \ (MP0_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) From fa8f311e9e3aa1d41f31f8b28f975c65f0b25c03 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Tue, 12 May 2020 18:27:21 +0800 Subject: [PATCH 282/714] drm/amdgpu: Correct the irq numbers for virtual crtc The irq number should be decided by num_crtc, and the num_crtc could change by parameter. Signed-off-by: Emily Deng Reviewed by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 33324427b555e6..7e0d8c092c7ee6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -766,7 +766,7 @@ static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1; + adev->crtc_irq.num_types = adev->mode_info.num_crtc; adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; } From 5017bf8214d929f6ff45c46ec21cd3c1bfc03dad Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 7 Jul 2021 12:42:34 -0400 Subject: [PATCH 283/714] drm/amdkfd: handle fault counters on invalid address prange is NULL if vm fault retry on invalid address, for this case, can not use prange to get pdd, use adev to get gpuidx and then get pdd instead, then increase pdd vm fault counter. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9a71d8919bd630..c7b364e4a287fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2375,21 +2375,27 @@ static bool svm_range_skip_recover(struct svm_range *prange) static void svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, - struct svm_range *prange, int32_t gpuidx) + int32_t gpuidx) { struct kfd_process_device *pdd; - if (gpuidx == MAX_GPU_INSTANCE) - /* fault is on different page of same range - * or fault is skipped to recover later - */ - pdd = svm_range_get_pdd_by_adev(prange, adev); - else - /* fault recovered - * or fault cannot recover because GPU no access on the range - */ - pdd = kfd_process_device_from_gpuidx(p, gpuidx); + /* fault is on different page of same range + * or fault is skipped to recover later + * or fault is on invalid virtual address + */ + if (gpuidx == MAX_GPU_INSTANCE) { + uint32_t gpuid; + int r; + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx); + if (r < 0) + return; + } + + /* fault is recovered + * or fault cannot recover because GPU no access on the range + */ + pdd = kfd_process_device_from_gpuidx(p, gpuidx); if (pdd) WRITE_ONCE(pdd->faults, pdd->faults + 1); } @@ -2525,7 +2531,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(adev, p, prange, gpuidx); + svm_range_count_fault(adev, p, gpuidx); mmput(mm); out: From c010efb7f0bc0c3cb2cd26b000f71d4bd0c427cd Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Thu, 8 Jul 2021 00:51:52 -0400 Subject: [PATCH 284/714] drm/amdgpu/display - only update eDP's backlight level when necessary [Why] The original logic is to update eDP's backlight level on every amdgpu dm atomic commit, which causes excessive DMUB write. As a result, when playing game or moving window around, DMUB timeout and system lagging are observed. [How] We only need to update eDP's backlight level when current level doesn't match requested level. Signed-off-by: Zhan Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 01e1062dc2353a..d3a2a5ff57e951 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9191,7 +9191,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) /* restore the backlight level */ - if (dm->backlight_dev) + if (dm->backlight_dev && (amdgpu_dm_backlight_get_level(dm) != dm->brightness[0])) amdgpu_dm_backlight_set_level(dm, dm->brightness[0]); #endif /* From 99e7d65cccc8f54581eb961a50da676b79c966d0 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 7 Jul 2021 17:55:55 -0400 Subject: [PATCH 285/714] drm/amdkfd: Allow CPU access for all VRAM BOs The thunk needs to mmap all BOs for CPU access to allow the debugger to access them. Invisible ones are mapped with PROT_NONE. Fixes: 71df0368e9b6 ("drm/amdgpu: Implement mmap as GEM object function") Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3b8e1ee8c475b7..eb82739b3080c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1393,8 +1393,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : - AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; From 9be26ddf88de5621f071a1e4f7725ce1015b5036 Mon Sep 17 00:00:00 2001 From: "Emily.Deng" Date: Thu, 1 Oct 2020 12:41:50 +0800 Subject: [PATCH 286/714] drm/amdgpu: Restore msix after FLR After FLR, the msix will be cleared, so need to re-enable it. Signed-off-by: Peng Ju Zhou Signed-off-by: Emily.Deng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 32ce0e679dc785..83af307e97cdf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -278,6 +278,21 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) return true; } +static void amdgpu_restore_msix(struct amdgpu_device *adev) +{ + u16 ctrl; + + pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl); + if (!(ctrl & PCI_MSIX_FLAGS_ENABLE)) + return; + + /* VF FLR */ + ctrl &= ~PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl); + ctrl |= PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl); +} + /** * amdgpu_irq_init - initialize interrupt handling * @@ -569,6 +584,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; + if (amdgpu_sriov_vf(adev)) + amdgpu_restore_msix(adev); + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; From 06055d2e1ce83ba02b0073ff6f4fdb883fdb05c0 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Fri, 9 Jul 2021 16:23:48 +0800 Subject: [PATCH 287/714] drm/amd/pm: Fix BACO state setting for Beige_Goby Correct BACO state setting for Beige_Goby Signed-off-by: Chengming Gui Reviewed-by: Jiansong Chen Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 388c5cb5c647ef..0a5d46ac9ccd8b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1528,6 +1528,7 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: if (amdgpu_runtime_pm == 2) ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, From f5cc09acece432d536c2e62c791b52760a25fa5c Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 9 Jul 2021 14:51:34 -0400 Subject: [PATCH 288/714] Revert "drm/amdkfd: Add memory sync before TLB flush on unmap" This reverts commit 3be4dca197010d1328df8b11febc8c40491be498. Reason for revert: it causes regressions on several Asics. Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 67541c30327a69..a10e3823b66045 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1566,29 +1566,13 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, i, args->n_devices); goto unmap_memory_from_gpu_failed; } - args->n_success = i+1; - } - mutex_unlock(&p->mutex); - - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); - if (err) { - pr_debug("Sync memory failed, wait interrupted by user signal\n"); - goto sync_memory_failed; - } - - /* Flush TLBs after waiting for the page table updates to complete */ - for (i = 0; i < args->n_devices; i++) { - peer = kfd_device_by_id(devices_arr[i]); - if (WARN_ON_ONCE(!peer)) - continue; - peer_pdd = kfd_get_process_device_data(peer, p); - if (WARN_ON_ONCE(!peer_pdd)) - continue; kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + args->n_success = i+1; } - kfree(devices_arr); + mutex_unlock(&p->mutex); + return 0; bind_process_to_device_failed: @@ -1596,7 +1580,6 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, unmap_memory_from_gpu_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; } From 22762e376612be51a47c96d977bdc103c7a4436c Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 9 Jul 2021 14:55:03 -0400 Subject: [PATCH 289/714] Revert "drm/amdgpu: Fix warning of Function parameter or member not described" This reverts commit 7a68d188d1c4a9d947369acaa19040a58baaaeda. Reason for revert: the related commit is reverted. Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 79cfa2d68487ea..6b43ebab68a11c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1816,7 +1816,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries - * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * From c37387c354c81eaf17bdc463727dcad1c951c0db Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 9 Jul 2021 14:57:11 -0400 Subject: [PATCH 290/714] Revert "drm/amdkfd: Make TLB flush conditional on mapping" This reverts commit 31f33243788dcbae8bd2819ed83923a73f7dfd30. Reason for revert: it causes regressions on several Asics. Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 19 +++++++-------- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 23 ++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 3 +-- 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index db16b3e83694e6..cf62f43a03da1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index eb82739b3080c5..4fb15750b9bb4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1057,8 +1057,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1069,7 +1068,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1081,8 +1080,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1099,7 +1097,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1596,8 +1594,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); @@ -1685,7 +1682,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -2135,7 +2132,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2341,7 +2338,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a10e3823b66045..84894cd81ced9c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1393,7 +1393,6 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; - bool table_freed = false; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1451,8 +1450,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, - peer_pdd->drm_priv, &table_freed); + peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to map to gpu %d/%d\n", i, args->n_devices); @@ -1470,17 +1468,16 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - if (table_freed) { - for (i = 0; i < args->n_devices; i++) { - peer = kfd_device_by_id(devices_arr[i]); - if (WARN_ON_ONCE(!peer)) - continue; - peer_pdd = kfd_get_process_device_data(peer, p); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); - } + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); } + kfree(devices_arr); return err; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 21ec8a18cad299..8a2c6fc438c0b4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -714,8 +714,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, - pdd->drm_priv, NULL); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv); if (err) goto err_map_mem; From d605094394ee35afd22f8adbe4a4b7b5f0c143a2 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 9 Jul 2021 14:58:16 -0400 Subject: [PATCH 291/714] Revert "drm/amdgpu: Add table_freed parameter to amdgpu_vm_bo_update" This reverts commit 075e8080c1a7571563171a07fa9ce47c4bc80044. Reason for revert: the related commit is reverted. Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 76fe5b71e35de9..30fa1f61e0e556 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -781,7 +781,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); if (r) return r; @@ -792,7 +792,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; @@ -811,7 +811,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b3404c43a9111e..d0d9bc445d7bc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -612,7 +612,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b43ebab68a11c..078c068937feee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1758,7 +1758,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); if (table_freed) - *table_freed = *table_freed || params.table_freed; + *table_freed = params.table_freed; error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1823,7 +1823,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed) + bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1902,7 +1902,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, resv, mapping->start, mapping->last, update_flags, mapping->offset, mem, - pages_addr, last_update, table_freed); + pages_addr, last_update, NULL); if (r) return r; } @@ -2154,7 +2154,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; } @@ -2173,7 +2173,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ddb85a85cbba0c..f8fa653d4da747 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -406,7 +406,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct dma_fence **fence, bool *free_table); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed); + bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); From 5adcd7458a78120dddec6b53c9619acd8f4d5931 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 9 Jul 2021 14:59:20 -0400 Subject: [PATCH 292/714] Revert "drm/amdkfd: Add heavy-weight TLB flush after unmapping" This reverts commit 1098d658bef05e5fee634aab0b6a1fa590cfca24. Reason for revert: it causes regressions on several Asics. Signed-off-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 84894cd81ced9c..e48acdd03c1a9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1563,7 +1563,6 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, i, args->n_devices); goto unmap_memory_from_gpu_failed; } - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); args->n_success = i+1; } kfree(devices_arr); From adefab4ef308f43c80537bd28e059eed7f3517e4 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 13 Jul 2021 14:54:47 +0800 Subject: [PATCH 293/714] drm/amd/pm: Add waiting for response of mode-reset message for yellow carp Remove mdelay process and use smu_cmn_send_smc_msg_with_param to send mode-reset message to SMC. Signed-off-by: Aaron Liu Reviewed-by: Evan Quan Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index de92f713a91185..0cfeb9fc7c03c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -298,13 +298,9 @@ static int yellow_carp_mode_reset(struct smu_context *smu, int type) if (index < 0) return index == -EACCES ? 0 : index; - mutex_lock(&smu->message_lock); - - ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, type); - - mutex_unlock(&smu->message_lock); - - mdelay(10); + ret = smu_cmn_send_smc_msg_with_param(smu, (uint16_t)index, type, NULL); + if (ret) + dev_err(smu->adev->dev, "Failed to mode reset!\n"); return ret; } From 01757f536ac825e3614d583fee9acb48c64ed084 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Tue, 13 Jul 2021 10:48:24 +0800 Subject: [PATCH 294/714] net: Use nlmsg_unicast() instead of netlink_unicast() It has 'if (err >0 )' statement in nlmsg_unicast(), so use nlmsg_unicast() instead of netlink_unicast(), this looks more concise. v2: remove the change in netfilter. Signed-off-by: Yajun Deng Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- net/ipv4/inet_diag.c | 5 +---- net/ipv4/raw_diag.c | 7 ++----- net/ipv4/udp_diag.c | 6 ++---- net/mptcp/mptcp_diag.c | 6 ++---- net/netlink/af_netlink.c | 2 +- net/sctp/diag.c | 6 ++---- net/unix/diag.c | 6 ++---- 8 files changed, 13 insertions(+), 27 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a933bd6345b1db..9fe13e4f5d08a5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1376,7 +1376,7 @@ static void nl_fib_input(struct sk_buff *skb) portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); + nlmsg_unicast(net->ipv4.fibnl, skb, portid); } static int __net_init nl_fib_lookup_init(struct net *net) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e65f4ef024a4fd..ef7897226f08e5 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -580,10 +580,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); out: if (sk) diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 1b5b8af27aafa7..ccacbde30a2c50 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -119,11 +119,8 @@ static int raw_diag_dump_one(struct netlink_callback *cb, return err; } - err = netlink_unicast(net->diag_nlsk, rep, - NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + return err; } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index b2cee9a307d4c2..1ed8c4d78e5cac 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -77,10 +77,8 @@ static int udp_dump_one(struct udp_table *tbl, kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + out: if (sk) sock_put(sk); diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 8f88ddeab6a2e2..f48eb6315bbb4b 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -57,10 +57,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb, kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + out: sock_put(sk); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d233ac4a91b677..380f95aacdec93 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2471,7 +2471,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, nlmsg_end(skb, rep); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); + nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); } EXPORT_SYMBOL(netlink_ack); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 493fc01e5d2b7c..760b367644c12b 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -284,10 +284,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + out: return err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3bb..7e7d7f45685af6 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -295,10 +295,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, - MSG_DONTWAIT); - if (err > 0) - err = 0; + err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + out: if (sk) sock_put(sk); From e56c6bbd98dc1cefb6f9c5d795fd29016e4f2fe7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 13 Jul 2021 12:33:50 +0300 Subject: [PATCH 295/714] net: ocelot: fix switchdev objects synced for wrong netdev with LAG offload The point with a *dev and a *brport_dev is that when we have a LAG net device that is a bridge port, *dev is an ocelot net device and *brport_dev is the bonding/team net device. The ocelot net device beneath the LAG does not exist from the bridge's perspective, so we need to sync the switchdev objects belonging to the brport_dev and not to the dev. Fixes: e4bd44e89dcf ("net: ocelot: replay switchdev events when joining bridge") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_net.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 3e89e34f86d59f..e9d260d84bf330 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1298,6 +1298,7 @@ static int ocelot_netdevice_lag_leave(struct net_device *dev, } static int ocelot_netdevice_changeupper(struct net_device *dev, + struct net_device *brport_dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack; @@ -1307,11 +1308,11 @@ static int ocelot_netdevice_changeupper(struct net_device *dev, if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) - err = ocelot_netdevice_bridge_join(dev, dev, + err = ocelot_netdevice_bridge_join(dev, brport_dev, info->upper_dev, extack); else - err = ocelot_netdevice_bridge_leave(dev, dev, + err = ocelot_netdevice_bridge_leave(dev, brport_dev, info->upper_dev); } if (netif_is_lag_master(info->upper_dev)) { @@ -1346,7 +1347,7 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev, if (ocelot_port->bond != dev) return NOTIFY_OK; - err = ocelot_netdevice_changeupper(lower, info); + err = ocelot_netdevice_changeupper(lower, dev, info); if (err) return notifier_from_errno(err); } @@ -1385,7 +1386,7 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct netdev_notifier_changeupper_info *info = ptr; if (ocelot_netdevice_dev_check(dev)) - return ocelot_netdevice_changeupper(dev, info); + return ocelot_netdevice_changeupper(dev, dev, info); if (netif_is_lag_master(dev)) return ocelot_netdevice_lag_changeupper(dev, info); From b0b33b048dcfbd7da82c3cde4fab02751dfab4d6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 13 Jul 2021 12:37:19 +0300 Subject: [PATCH 296/714] net: dsa: sja1105: fix address learning getting disabled on the CPU port In May 2019 when commit 640f763f98c2 ("net: dsa: sja1105: Add support for Spanning Tree Protocol") was introduced, the comment that "STP does not get called for the CPU port" was true. This changed after commit 0394a63acfe2 ("net: dsa: enable and disable all ports") in August 2019 and went largely unnoticed, because the sja1105_bridge_stp_state_set() method did nothing different compared to the static setup done by sja1105_init_mac_settings(). With the ability to turn address learning off introduced by the blamed commit, there is a new priv->learn_ena port mask in the driver. When sja1105_bridge_stp_state_set() gets called and we are in BR_STATE_LEARNING or later, address learning is enabled or not depending on priv->learn_ena & BIT(port). So what happens is that priv->learn_ena is not being set from anywhere for the CPU port, and the static configuration done by sja1105_init_mac_settings() is being overwritten. To solve this, acknowledge that the static configuration of STP state is no longer necessary because the STP state is being set by the DSA core now, but what is necessary is to set priv->learn_ena for the CPU port. Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 4f0545605f6bcf..ced8c9cb29c297 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -122,14 +122,12 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) for (i = 0; i < ds->num_ports; i++) { mac[i] = default_mac; - if (i == dsa_upstream_port(priv->ds, i)) { - /* STP doesn't get called for CPU port, so we need to - * set the I/O parameters statically. - */ - mac[i].dyn_learn = true; - mac[i].ingress = true; - mac[i].egress = true; - } + + /* Let sja1105_bridge_stp_state_set() keep address learning + * enabled for the CPU port. + */ + if (dsa_is_cpu_port(ds, i)) + priv->learn_ena |= BIT(i); } return 0; From deb7178eb940e2c5caca1b1db084a69b2e59b4c9 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 13 Jul 2021 13:58:53 +0300 Subject: [PATCH 297/714] net: fddi: fix UAF in fza_probe fp is netdev private data and it cannot be used after free_netdev() call. Using fp after free_netdev() can cause UAF bug. Fix it by moving free_netdev() after error message. Fixes: 61414f5ec983 ("FDDI: defza: Add support for DEC FDDIcontroller 700 TURBOchannel adapter") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/fddi/defza.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 14f07050b6b1b9..0de2c4552f5eb3 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -1504,9 +1504,8 @@ static int fza_probe(struct device *bdev) release_mem_region(start, len); err_out_kfree: - free_netdev(dev); - pr_err("%s: initialization failure, aborting!\n", fp->name); + free_netdev(dev); return ret; } From 2e7ea96924acc502929c3ffa0fcbdaeec00b2208 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 11:51:58 -0500 Subject: [PATCH 298/714] cpufreq: Fix fall-through warning for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a fallthrough warning by simply dropping the empty default case at the bottom. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva --- drivers/cpufreq/longhaul.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 182a4dbca0952c..c538a153ee8284 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -942,8 +942,6 @@ static int __init longhaul_init(void) return cpufreq_register_driver(&longhaul_driver); case 10: pr_err("Use acpi-cpufreq driver for VIA C7\n"); - default: - ; } return -ENODEV; From f28100cb9c9645c07cbd22431278ac9492f6a01c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 13 Jul 2021 16:21:27 +0200 Subject: [PATCH 299/714] sfc: fix lack of XDP TX queues - error XDP TX failed (-22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: e26ca4b53582 sfc: reduce the number of requested xdp ev queues The buggy commit intended to allocate less channels for XDP in order to be more unlikely to reach the limit of 32 channels of the driver. The idea was to use each IRQ/eventqeue for more XDP TX queues than before, calculating which is the maximum number of TX queues that one event queue can handle. For example, in EF10 each event queue could handle up to 8 queues, better than the 4 they were handling before the change. This way, it would have to allocate half of channels than before for XDP TX. The problem is that the TX queues are also contained inside the channel structs, and there are only 4 queues per channel. Reducing the number of channels means also reducing the number of queues, resulting in not having the desired number of 1 queue per CPU. This leads to getting errors on XDP_TX and XDP_REDIRECT if they're executed from a high numbered CPU, because there only exist queues for the low half of CPUs, actually. If XDP_TX/REDIRECT is executed in a low numbered CPU, the error doesn't happen. This is the error in the logs (repeated many times, even rate limited): sfc 0000:5e:00.0 ens3f0np0: XDP TX failed (-22) This errors happens in function efx_xdp_tx_buffers, where it expects to have a dedicated XDP TX queue per CPU. Reverting the change makes again more likely to reach the limit of 32 channels in machines with many CPUs. If this happen, no XDP_TX/REDIRECT will be possible at all, and we will have this log error messages: At interface probe: sfc 0000:5e:00.0: Insufficient resources for 12 XDP event queues (24 other channels, max 32) At every subsequent XDP_TX/REDIRECT failure, rate limited: sfc 0000:5e:00.0 ens3f0np0: XDP TX failed (-22) However, without reverting the change, it makes the user to think that everything is OK at probe time, but later it fails in an unpredictable way, depending on the CPU that handles the packet. It is better to restore the predictable behaviour. If the user sees the error message at probe time, he/she can try to configure the best way it fits his/her needs. At least, he/she will have 2 options: - Accept that XDP_TX/REDIRECT is not available (he/she may not need it) - Load sfc module with modparam 'rss_cpus' with a lower number, thus creating less normal RX queues/channels, letting more free resources for XDP, with some performance penalty. Anyway, let the calculation of maximum TX queues that can be handled by a single event queue, and use it only if it's less than the number of TX queues per channel. This doesn't happen in practice, but could happen if some constant values are tweaked in the future, such us EFX_MAX_TXQ_PER_CHANNEL, EFX_MAX_EVQ_SIZE or EFX_MAX_DMAQ_SIZE. Related mailing list thread: https://lore.kernel.org/bpf/20201215104327.2be76156@carbon/ Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index a3ca406a356112..5b71f8a03a6d5e 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -152,6 +152,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * maximum size. */ tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx); + tx_per_ev = min(tx_per_ev, EFX_MAX_TXQ_PER_CHANNEL); n_xdp_tx = num_possible_cpus(); n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev); @@ -181,7 +182,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, efx->xdp_tx_queue_count = 0; } else { efx->n_xdp_channels = n_xdp_ev; - efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL; + efx->xdp_tx_per_channel = tx_per_ev; efx->xdp_tx_queue_count = n_xdp_tx; n_channels += n_xdp_ev; netif_dbg(efx, drv, efx->net_dev, From 788bc000d4c2f25232db19ab3a0add0ba4e27671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 13 Jul 2021 16:21:28 +0200 Subject: [PATCH 300/714] sfc: ensure correct number of XDP queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 99ba0ea616aa ("sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues") intended to fix a problem caused by a round up when calculating the number of XDP channels and queues. However, this was not the real problem. The real problem was that the number of XDP TX queues had been reduced to half in commit e26ca4b53582 ("sfc: reduce the number of requested xdp ev queues"), but the variable xdp_tx_queue_count had remained the same. Once the correct number of XDP TX queues is created again in the previous patch of this series, this also can be reverted since the error doesn't actually exist. Only in the case that there is a bug in the code we can have different values in xdp_queue_number and efx->xdp_tx_queue_count. Because of this, and per Edward Cree's suggestion, I add instead a WARN_ON to catch if it happens again in the future. Note that the number of allocated queues can be higher than the number of used ones due to the round up, as explained in the existing comment in the code. That's why we also have to stop increasing xdp_queue_number beyond efx->xdp_tx_queue_count. Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 5b71f8a03a6d5e..bb48a139dd158d 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -892,18 +892,20 @@ int efx_set_channels(struct efx_nic *efx) if (efx_channel_is_xdp_tx(channel)) { efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue = next_queue++; - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); + /* We may have a few left-over XDP TX * queues owing to xdp_tx_queue_count * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL. * We still allocate and probe those * TXQs, but never use them. */ - if (xdp_queue_number < efx->xdp_tx_queue_count) + if (xdp_queue_number < efx->xdp_tx_queue_count) { + netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", + channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); efx->xdp_tx_queues[xdp_queue_number] = tx_queue; - xdp_queue_number++; + xdp_queue_number++; + } } } else { efx_for_each_channel_tx_queue(tx_queue, channel) { @@ -915,8 +917,7 @@ int efx_set_channels(struct efx_nic *efx) } } } - if (xdp_queue_number) - efx->xdp_tx_queue_count = xdp_queue_number; + WARN_ON(xdp_queue_number != efx->xdp_tx_queue_count); rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) From d2a16bde77322fca6b6f36ebe19097a1c3c46f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 13 Jul 2021 16:21:29 +0200 Subject: [PATCH 301/714] sfc: add logs explaining XDP_TX/REDIRECT is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If it's not possible to allocate enough channels for XDP, XDP_TX and XDP_REDIRECT don't work. However, only a message saying that not enough channels were available was shown, but not saying what are the consequences in that case. The user didn't know if he/she can use XDP or not, if the performance is reduced, or what. Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index bb48a139dd158d..e5b0d795c3012c 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -170,6 +170,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, netif_err(efx, drv, efx->net_dev, "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", n_xdp_ev, n_channels, max_channels); + netif_err(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT will not work on this interface"); efx->n_xdp_channels = 0; efx->xdp_tx_per_channel = 0; efx->xdp_tx_queue_count = 0; @@ -177,6 +179,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, netif_err(efx, drv, efx->net_dev, "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", n_xdp_tx, n_channels, efx->max_vis); + netif_err(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT will not work on this interface"); efx->n_xdp_channels = 0; efx->xdp_tx_per_channel = 0; efx->xdp_tx_queue_count = 0; From 420405ecde061fde76d67bd3a67577a563ea758e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 13 Jul 2021 10:49:26 -0700 Subject: [PATCH 302/714] configfs: fix the read and write iterators Commit 7fe1e79b59ba ("configfs: implement the .read_iter and .write_iter methods") changed the simple_read_from_buffer() calls into copy_to_iter() calls and the simple_write_to_buffer() calls into copy_from_iter() calls. The simple*buffer() methods update the file offset (*ppos) but the read and write iterators not yet. Make the read and write iterators update the file offset (iocb->ki_pos). This patch has been tested as follows: # modprobe target_core_user # dd if=/sys/kernel/config/target/dbroot bs=1 /var/target 12+0 records in 12+0 records out 12 bytes copied, 9.5539e-05 s, 126 kB/s # cd /sys/kernel/config/acpi/table # mkdir test # cd test # dmesg -c >/dev/null; printf 'SSDT\x8\0\0\0abcdefghijklmnopqrstuvwxyz' | dd of=aml bs=1; dmesg -c 34+0 records in 34+0 records out 34 bytes copied, 0.010627 s, 3.2 kB/s [ 261.056551] ACPI configfs: invalid table length Reported-by: Yanko Kaneti Cc: Yanko Kaneti Fixes: 7fe1e79b59ba ("configfs: implement the .read_iter and .write_iter methods") Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2f63bf3a7325f0..5a0be9985bae1e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -91,7 +91,10 @@ static ssize_t configfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } pr_debug("%s: count = %zd, pos = %lld, buf = %s\n", __func__, iov_iter_count(to), iocb->ki_pos, buffer->page); - retval = copy_to_iter(buffer->page, buffer->count, to); + if (iocb->ki_pos >= buffer->count) + goto out; + retval = copy_to_iter(buffer->page + iocb->ki_pos, + buffer->count - iocb->ki_pos, to); iocb->ki_pos += retval; if (retval == 0) retval = -EFAULT; @@ -162,7 +165,10 @@ static ssize_t configfs_bin_read_iter(struct kiocb *iocb, struct iov_iter *to) buffer->needs_read_fill = 0; } - retval = copy_to_iter(buffer->bin_buffer, buffer->bin_buffer_size, to); + if (iocb->ki_pos >= buffer->bin_buffer_size) + goto out; + retval = copy_to_iter(buffer->bin_buffer + iocb->ki_pos, + buffer->bin_buffer_size - iocb->ki_pos, to); iocb->ki_pos += retval; if (retval == 0) retval = -EFAULT; @@ -171,21 +177,28 @@ static ssize_t configfs_bin_read_iter(struct kiocb *iocb, struct iov_iter *to) return retval; } -static int fill_write_buffer(struct configfs_buffer *buffer, +/* Fill [buffer, buffer + pos) with data coming from @from. */ +static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos, struct iov_iter *from) { + loff_t to_copy; int copied; + u8 *to; if (!buffer->page) buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); if (!buffer->page) return -ENOMEM; - copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from); + to_copy = SIMPLE_ATTR_SIZE - 1 - pos; + if (to_copy <= 0) + return 0; + to = buffer->page + pos; + copied = copy_from_iter(to, to_copy, from); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, * so e.g. sscanf() can scan the string easily */ - buffer->page[copied] = 0; + to[copied] = 0; return copied ? : -EFAULT; } @@ -217,7 +230,7 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t len; mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, from); + len = fill_write_buffer(buffer, iocb->ki_pos, from); if (len > 0) len = flush_write_buffer(file, buffer, len); if (len > 0) @@ -272,7 +285,9 @@ static ssize_t configfs_bin_write_iter(struct kiocb *iocb, buffer->bin_buffer_size = end_offset; } - len = copy_from_iter(buffer->bin_buffer, buffer->bin_buffer_size, from); + len = copy_from_iter(buffer->bin_buffer + iocb->ki_pos, + buffer->bin_buffer_size - iocb->ki_pos, from); + iocb->ki_pos += len; out: mutex_unlock(&buffer->mutex); return len ? : -EFAULT; From f336a009f8e3dd0b47168565584608a4a62cbbb4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:09:06 -0500 Subject: [PATCH 303/714] math-emu: Fix fall-through warning Fix the following fallthrough warning (nds32-randconfig with GCC): include/math-emu/op-common.h:332:8: warning: this statement may fall through [-Wimplicit-fallthrough=] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- include/math-emu/op-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index 143568d64b207e..4b57bbba588afd 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -338,7 +338,7 @@ do { \ FP_SET_EXCEPTION(FP_EX_INVALID | FP_EX_INVALID_ISI); \ break; \ } \ - /* FALLTHRU */ \ + fallthrough; \ \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ From c8698340e0e5897c30811f8e7e86bcebe53e0519 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:13:40 -0500 Subject: [PATCH 304/714] video: fbdev: Fix fall-through warning for Clang Fix the following fallthrough warning (arm64-randconfig with Clang): drivers/video/fbdev/xilinxfb.c:244:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/video/fbdev/xilinxfb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c index ffbf900648d90e..438e2c78142f92 100644 --- a/drivers/video/fbdev/xilinxfb.c +++ b/drivers/video/fbdev/xilinxfb.c @@ -241,6 +241,8 @@ xilinx_fb_blank(int blank_mode, struct fb_info *fbi) case FB_BLANK_POWERDOWN: /* turn off panel */ xilinx_fb_out32(drvdata, REG_CTRL, 0); + break; + default: break; } From 223fa873facce6eef165009294d01e18c191c609 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:18:14 -0500 Subject: [PATCH 305/714] scsi: libsas: Fix fall-through warning for Clang Fix the following fallthrough warning (arm64-randconfig with Clang): drivers/scsi/libsas/sas_discover.c:467:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/libsas/sas_discover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 9f5068f3bcfb2c..dd205414e505cb 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -461,7 +461,7 @@ static void sas_discover_domain(struct work_struct *work) break; #else pr_notice("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n"); - /* Fall through */ + fallthrough; #endif /* Fall through - only for the #else condition above. */ default: From 54325d0849d60ed52ee9316f76d116b52b53669b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:21:22 -0500 Subject: [PATCH 306/714] PCI: Fix fall-through warning for Clang Fix the following fallthrough warning (arm64-randconfig with Clang): drivers/pci/proc.c:234:3: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/pci/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 9bab07302bbf20..d32fbfc93ea93c 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -230,8 +230,8 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, break; } /* If arch decided it can't, fall through... */ -#endif /* HAVE_PCI_MMAP */ fallthrough; +#endif /* HAVE_PCI_MMAP */ default: ret = -EINVAL; break; From f95deaeca8e106af891061e5f9c745dc7dc172c0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:23:21 -0500 Subject: [PATCH 307/714] mmc: jz4740: Fix fall-through warning for Clang Fix the following fallthrough warning (mips-randconfig with Clang): drivers/mmc/host/jz4740_mmc.c:792:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/mmc/host/jz4740_mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 0db17bcc9c1638..cb1a64a5c256f8 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -789,6 +789,8 @@ static irqreturn_t jz_mmc_irq_worker(int irq, void *devid) break; } } + fallthrough; + case JZ4740_MMC_STATE_DONE: break; } From 5a1ab5c0299a7b3b9e7ec52c327f44d98b8e001d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:25:06 -0500 Subject: [PATCH 308/714] iommu/arm-smmu-v3: Fix fall-through warning for Clang Fix the following fallthrough warning (arm64-randconfig with Clang): drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:382:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index dd20b01771c4bd..235f9bdaeaf223 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -379,6 +379,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) switch (idx) { case CMDQ_ERR_CERROR_ABT_IDX: dev_err(smmu->dev, "retrying command fetch\n"); + return; case CMDQ_ERR_CERROR_NONE_IDX: return; case CMDQ_ERR_CERROR_ATC_INV_IDX: From 41614014a7afbbce5b61bfce5ce89ee3455143e7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:27:08 -0500 Subject: [PATCH 309/714] dmaengine: ipu: Fix fall-through warning for Clang Fix the following fallthrough warnings (arm64-randconfig): drivers/dma/ipu/ipu_idmac.c:621:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] rivers/dma/ipu/ipu_idmac.c:981:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/dma/ipu/ipu_idmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 104ad420abbeef..baab1ca9f6214d 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -618,6 +618,7 @@ static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan) case IDMAC_SDC_1: case IDMAC_IC_7: ipu_channel_set_priority(ipu, channel, true); + break; default: break; } @@ -978,6 +979,7 @@ static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan) case IDMAC_SDC_0: case IDMAC_SDC_1: n_desc = 4; + break; default: break; } From d4e81342ea2399cf92e98ea5c0d025bcf8a3d065 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 14:43:09 -0500 Subject: [PATCH 310/714] s390: Fix fall-through warnings for Clang Fix the following fallthrough warnings: drivers/s390/net/ctcm_fsms.c:1457:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] drivers/s390/net/qeth_l3_main.c:437:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] drivers/s390/char/tape_char.c:374:4: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] arch/s390/kernel/uprobes.c:129:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: Nathan Chancellor Signed-off-by: Gustavo A. R. Silva --- arch/s390/kernel/uprobes.c | 1 + drivers/s390/char/tape_char.c | 2 -- drivers/s390/net/ctcm_fsms.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bbf8622bbf5d7e..bd3ef121c37980 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -126,6 +126,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, case DIE_SSTEP: if (uprobe_post_sstep_notifier(regs)) return NOTIFY_STOP; + break; default: break; } diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 8abb4292330718..cc8237afeffa0d 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -371,8 +371,6 @@ __tapechar_ioctl(struct tape_device *device, case MTSEEK: if (device->required_tapemarks) tape_std_terminate_write(device); - default: - ; } rc = tape_mtop(device, op.mt_op, op.mt_count); diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c index b341075397d944..377e3689d1d438 100644 --- a/drivers/s390/net/ctcm_fsms.c +++ b/drivers/s390/net/ctcm_fsms.c @@ -1454,6 +1454,7 @@ static void ctcmpc_chx_rx(fsm_instance *fi, int event, void *arg) get_ccwdev_lock(ch->cdev), saveflags); if (rc != 0) ctcm_ccw_check_rc(ch, rc, "normal RX"); + break; default: break; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d308ff744a295a..f0d6f205c53cda 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -434,6 +434,7 @@ static int qeth_l3_correct_routing_type(struct qeth_card *card, if (qeth_is_ipafunc_supported(card, prot, IPA_OSA_MC_ROUTER)) return 0; + goto out_inval; default: goto out_inval; } From d6a48a476a5de8bdfbdd6040aa5a48955fc49e44 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 14:48:28 -0500 Subject: [PATCH 311/714] dmaengine: ti: k3-udma: Fix fall-through warning for Clang Fix the following fallthrough warning: drivers/dma/ti/k3-udma.c:4951:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: Nathan Chancellor Signed-off-by: Gustavo A. R. Silva --- drivers/dma/ti/k3-udma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 96ad21869ba720..a35858610780cf 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4948,6 +4948,7 @@ static int setup_resources(struct udma_dev *ud) ud->tchan_cnt), ud->rchan_cnt - bitmap_weight(ud->rchan_map, ud->rchan_cnt)); + break; default: break; } From b51883d47d37c3f3fa80d6bcd8cc3a57d5b95130 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 14:50:47 -0500 Subject: [PATCH 312/714] power: supply: Fix fall-through warnings for Clang Fix the following fallthrough warnings: drivers/power/supply/ab8500_fg.c:1730:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] drivers/power/supply/abx500_chargalg.c:1155:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: Nathan Chancellor Signed-off-by: Gustavo A. R. Silva --- drivers/power/supply/ab8500_fg.c | 2 ++ drivers/power/supply/abx500_chargalg.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index 3d45ed0157c6e7..a6ebdb269fdd6a 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -1728,6 +1728,7 @@ static void ab8500_fg_algorithm_calibrate(struct ab8500_fg *di) break; case AB8500_FG_CALIB_WAIT: dev_dbg(di->dev, "Calibration WFI\n"); + break; default: break; } @@ -2224,6 +2225,7 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data) queue_work(di->fg_wq, &di->fg_work); break; } + break; default: break; } diff --git a/drivers/power/supply/abx500_chargalg.c b/drivers/power/supply/abx500_chargalg.c index a17849bfacbff9..b72826cf6794ba 100644 --- a/drivers/power/supply/abx500_chargalg.c +++ b/drivers/power/supply/abx500_chargalg.c @@ -1150,6 +1150,7 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data) default: break; } + break; default: break; } From 2feeb52859fc1ab94cd35b61ada3a6ac4ff24243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 30 Jun 2021 19:44:13 +0300 Subject: [PATCH 313/714] drm/i915/gt: Fix -EDEADLK handling regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion to ww mutexes failed to address the fence code which already returns -EDEADLK when we run out of fences. Ww mutexes on the other hand treat -EDEADLK as an internal errno value indicating a need to restart the operation due to a deadlock. So now when the fence code returns -EDEADLK the higher level code erroneously restarts everything instead of returning the error to userspace as is expected. To remedy this let's switch the fence code to use a different errno value for this. -ENOBUFS seems like a semi-reasonable unique choice. Apart from igt the only user of this I could find is sna, and even there all we do is dump the current fence registers from debugfs into the X server log. So no user visible functionality is affected. If we really cared about preserving this we could of course convert back to -EDEADLK higher up, but doesn't seem like that's worth the hassle here. Not quite sure which commit specifically broke this, but I'll just attribute it to the general gem ww mutex work. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström Testcase: igt/gem_pread/exhaustion Testcase: igt/gem_pwrite/basic-exhaustion Testcase: igt/gem_fenced_exec_thrash/too-many-fences Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210630164413.25481-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit 78d2ad7eb4e1f0e9cd5d79788446b6092c21d3e0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index cac7f3f4464287..f8948de7203601 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) if (intel_has_pending_fb_unpin(ggtt->vm.i915)) return ERR_PTR(-EAGAIN); - return ERR_PTR(-EDEADLK); + return ERR_PTR(-ENOBUFS); } int __i915_vma_pin_fence(struct i915_vma *vma) From 4796372e7c117b84bfd929526f48e23c79030dcd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 14:58:18 -0500 Subject: [PATCH 314/714] ASoC: Mediatek: MT8183: Fix fall-through warning for Clang Fix the following fallthrough warning: sound/soc/mediatek/mt8183/mt8183-dai-adda.c:342:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: Nathan Chancellor Signed-off-by: Gustavo A. R. Silva --- sound/soc/mediatek/mt8183/mt8183-dai-adda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/mediatek/mt8183/mt8183-dai-adda.c b/sound/soc/mediatek/mt8183/mt8183-dai-adda.c index 2b758a18c2ea5f..5b8a274419ed14 100644 --- a/sound/soc/mediatek/mt8183/mt8183-dai-adda.c +++ b/sound/soc/mediatek/mt8183/mt8183-dai-adda.c @@ -341,6 +341,7 @@ static int set_mtkaif_rx(struct mtk_base_afe *afe) case MT8183_MTKAIF_PROTOCOL_1: regmap_write(afe->regmap, AFE_AUD_PAD_TOP, 0x31); regmap_write(afe->regmap, AFE_ADDA_MTKAIF_CFG0, 0x0); + break; default: break; } From bc431d2153cc290573531601b5004babe7011568 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:38:58 -0500 Subject: [PATCH 315/714] MIPS: Fix fall-through warnings for Clang Fix the following fallthrough warnings: arch/mips/mm/tlbex.c:1386:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] arch/mips/mm/tlbex.c:2173:3: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/mips/mm/tlbex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index cd4afcdf372550..9adad24c2e65e5 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1383,6 +1383,7 @@ static void build_r4000_tlb_refill_handler(void) switch (boot_cpu_type()) { default: if (sizeof(long) == 4) { + fallthrough; case CPU_LOONGSON2EF: /* Loongson2 ebase is different than r4k, we have more space */ if ((p - tlb_handler) > 64) @@ -2169,6 +2170,7 @@ static void build_r4000_tlb_load_handler(void) default: if (cpu_has_mips_r2_exec_hazard) { uasm_i_ehb(&p); + fallthrough; case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: From cf6678aec2530701f8e13b7551d131599c33e5ca Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 13:41:16 -0500 Subject: [PATCH 316/714] MIPS: Fix unreachable code issue Fix the following warning (mips-randconfig): arch/mips/include/asm/fpu.h:79:3: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] Originally, the /* fallthrough */ comment was introduced by commit: 597ce1723e0f ("MIPS: Support for 64-bit FP with O32 binaries") and it was wrongly replaced with fallthrough; by commit: c9b029903466 ("MIPS: Use fallthrough for arch/mips") As the original comment is actually useful, fix this issue by removing unreachable fallthrough; statement and place the original /* fallthrough */ comment back. Fixes: c9b029903466 ("MIPS: Use fallthrough for arch/mips") Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60edca25.k00ut905IFBjPyt5%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/mips/include/asm/fpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 08f9dd6903b712..86310d6e10352c 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -76,7 +76,7 @@ static inline int __enable_fpu(enum fpu_mode mode) /* we only have a 32-bit FPU */ return SIGFPE; #endif - fallthrough; + /* fallthrough */ case FPU_32BIT: if (cpu_has_fre) { /* clear FRE */ From bcb9928a155444dbd212473e60241ca0a7f641e1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 13 Jul 2021 12:40:21 +0300 Subject: [PATCH 317/714] net: dsa: properly check for the bridge_leave methods in dsa_switch_bridge_leave() This was not caught because there is no switch driver which implements the .port_bridge_join but not .port_bridge_leave method, but it should nonetheless be fixed, as in certain conditions (driver development) it might lead to NULL pointer dereference. Fixes: f66a6a69f97a ("net: dsa: permit cross-chip bridging between all trees in the system") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 24845514598201..5ece05dfd8f2c7 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -113,11 +113,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, int err, port; if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_join) + ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_join) + ds->ops->crosschip_bridge_leave) ds->ops->crosschip_bridge_leave(ds, info->tree_index, info->sw_index, info->port, info->br); From 479857a9bcc86e808216daae8ca1c9213b9e117a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Jul 2021 19:19:03 -0500 Subject: [PATCH 318/714] powerpc/powernv: Fix fall-through warning for Clang Fix the following fallthrough warnings (powernv_defconfig and powerpc64): drivers/char/powernv-op-panel.c:78:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: Nathan Chancellor Signed-off-by: Gustavo A. R. Silva --- drivers/char/powernv-op-panel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/powernv-op-panel.c b/drivers/char/powernv-op-panel.c index 027484ecfb0de8..3c99696b145ee6 100644 --- a/drivers/char/powernv-op-panel.c +++ b/drivers/char/powernv-op-panel.c @@ -75,6 +75,7 @@ static int __op_panel_update_display(void) rc); break; } + break; case OPAL_SUCCESS: break; default: From ce36c94214f0d534aad4adb4219cc046270948fb Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 5 Jul 2021 08:56:57 +0200 Subject: [PATCH 319/714] iommu/qcom: Revert "iommu/arm: Cleanup resources in case of probe error path" QCOM IOMMU driver calls bus_set_iommu() for every IOMMU device controller, what fails for the second and latter IOMMU devices. This is intended and must be not fatal to the driver registration process. Also the cleanup path should take care of the runtime PM state, what is missing in the current patch. Revert relevant changes to the QCOM IOMMU driver until a proper fix is prepared. This partially reverts commit 249c9dc6aa0db74a0f7908efd04acf774e19b155. Fixes: 249c9dc6aa0d ("iommu/arm: Cleanup resources in case of probe error path") Suggested-by: Will Deacon Signed-off-by: Marek Szyprowski Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210705065657.30356-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 25ed444ff94d07..021cf8f65ffcd6 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -849,12 +849,10 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - goto err_sysfs_remove; + return ret; } - ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); - if (ret) - goto err_unregister_device; + bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); if (qcom_iommu->local_base) { pm_runtime_get_sync(dev); @@ -863,13 +861,6 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) } return 0; - -err_unregister_device: - iommu_device_unregister(&qcom_iommu->iommu); - -err_sysfs_remove: - iommu_device_sysfs_remove(&qcom_iommu->iommu); - return ret; } static int qcom_iommu_device_remove(struct platform_device *pdev) From 37764b952e1b39053defc7ebe5dcd8c4e3e78de9 Mon Sep 17 00:00:00 2001 From: Sanjay Kumar Date: Mon, 12 Jul 2021 15:13:15 +0800 Subject: [PATCH 320/714] iommu/vt-d: Global devTLB flush when present context entry changed This fixes a bug in context cache clear operation. The code was not following the correct invalidation flow. A global device TLB invalidation should be added after the IOTLB invalidation. At the same time, it uses the domain ID from the context entry. But in scalable mode, the domain ID is in PASID table entry, not context entry. Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Sanjay Kumar Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210712071315.3416543-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a6a07d985709f4..57270290d62b2f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2429,10 +2429,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } -static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) +static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn) { - unsigned long flags; + struct intel_iommu *iommu = info->iommu; struct context_entry *context; + unsigned long flags; u16 did_old; if (!iommu) @@ -2444,7 +2445,16 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn spin_unlock_irqrestore(&iommu->lock, flags); return; } - did_old = context_domain_id(context); + + if (sm_supported(iommu)) { + if (hw_pass_through && domain_type_is_si(info->domain)) + did_old = FLPT_DEFAULT_DID; + else + did_old = info->domain->iommu_did[iommu->seq_id]; + } else { + did_old = context_domain_id(context); + } + context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock_irqrestore(&iommu->lock, flags); @@ -2462,6 +2472,8 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn 0, 0, DMA_TLB_DSI_FLUSH); + + __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); } static inline void unlink_domain_info(struct device_domain_info *info) @@ -4425,9 +4437,9 @@ int __init intel_iommu_init(void) static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) { - struct intel_iommu *iommu = opaque; + struct device_domain_info *info = opaque; - domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); + domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff); return 0; } @@ -4437,12 +4449,13 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op * devices, unbinding the driver from any one of them will possibly leave * the others unable to operate. */ -static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) +static void domain_context_clear(struct device_domain_info *info) { - if (!iommu || !dev || !dev_is_pci(dev)) + if (!info->iommu || !info->dev || !dev_is_pci(info->dev)) return; - pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); + pci_for_each_dma_alias(to_pci_dev(info->dev), + &domain_context_clear_one_cb, info); } static void __dmar_remove_one_dev_info(struct device_domain_info *info) @@ -4466,7 +4479,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) iommu_disable_dev_iotlb(info); if (!dev_is_real_dma_subdevice(info->dev)) - domain_context_clear(iommu, info->dev); + domain_context_clear(info); intel_pasid_free_table(info->dev); } From 474dd1c6506411752a9b2f2233eec11f1733a099 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Jul 2021 15:17:12 +0800 Subject: [PATCH 321/714] iommu/vt-d: Fix clearing real DMA device's scalable-mode context entries The commit 2b0140c69637e ("iommu/vt-d: Use pci_real_dma_dev() for mapping") fixes an issue of "sub-device is removed where the context entry is cleared for all aliases". But this commit didn't consider the PASID entry and PASID table in VT-d scalable mode. This fix increases the coverage of scalable mode. Suggested-by: Sanjay Kumar Fixes: 8038bdb855331 ("iommu/vt-d: Only clear real DMA device's context entries") Fixes: 2b0140c69637e ("iommu/vt-d: Use pci_real_dma_dev() for mapping") Cc: stable@vger.kernel.org # v5.6+ Cc: Jon Derrick Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210712071712.3416949-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 57270290d62b2f..dd22fc7d517645 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4472,14 +4472,13 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) iommu = info->iommu; domain = info->domain; - if (info->dev) { + if (info->dev && !dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); - if (!dev_is_real_dma_subdevice(info->dev)) - domain_context_clear(info); + domain_context_clear(info); intel_pasid_free_table(info->dev); } From c987b65a574fad8b598d6f58f010d8f630327428 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 12 Jul 2021 12:12:32 +0200 Subject: [PATCH 322/714] iommu/rockchip: Fix physical address decoding Restore bits 39 to 32 at correct position. It reverses the operation done in rk_dma_addr_dte_v2(). Fixes: c55356c534aa ("iommu: rockchip: Add support for iommu v2") Reported-by: Dan Carpenter Signed-off-by: Benjamin Gaignard Link: https://lore.kernel.org/r/20210712101232.318589-1-benjamin.gaignard@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 94b9d8e5b9a40c..9febfb7f3025b2 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -544,12 +544,14 @@ static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) } #define DT_HI_MASK GENMASK_ULL(39, 32) +#define DTE_BASE_HI_MASK GENMASK(11, 4) #define DT_SHIFT 28 static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr) { - return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) | - ((addr & DT_HI_MASK) << DT_SHIFT); + u64 addr64 = addr; + return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) | + ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT); } static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma) From aa21548e34c19c12e924c736f3fd9e6a4d0f5419 Mon Sep 17 00:00:00 2001 From: Sathya Prakash M R Date: Mon, 12 Jul 2021 15:16:20 -0500 Subject: [PATCH 323/714] ASoC: SOF: Intel: Update ADL descriptor to use ACPI power states The ADL descriptor was missing an ACPI power setting, causing the DSP to enter D3 even with a D0i1-compatible wake-on-voice/hotwording capture stream. Fixes: 4ad03f894b3c ('ASoC: SOF: Intel: Update ADL P to use its own descriptor') Reviewed-by: Ranjani Sridharan Signed-off-by: Sathya Prakash M R Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210712201620.44311-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index a00262184efab4..d04ce84fe7cc2a 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -89,6 +89,7 @@ static const struct sof_dev_desc adls_desc = { static const struct sof_dev_desc adl_desc = { .machines = snd_soc_acpi_intel_adl_machines, .alt_machines = snd_soc_acpi_intel_adl_sdw_machines, + .use_acpi_target_states = true, .resindex_lpe_base = 0, .resindex_pcicfg_base = -1, .resindex_imr_base = -1, From 9431f8df233f808baa5fcc62b520cc6503fdf022 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 13 Jul 2021 15:04:17 +0100 Subject: [PATCH 324/714] ASoC: codecs: wcd938x: make sdw dependency explicit in Kconfig currenlty wcd938x has only soundwire interface and depends on symbols from wcd938x soundwire module, so make this dependency explicit in Kconfig Without this one of the randconfig endup setting CONFIG_SND_SOC_WCD938X=y CONFIG_SND_SOC_WCD938X_SDW=m resulting in some undefined reference to wcd938x_sdw* symbols. Reported-by: kernel test robot Fixes: 045442228868 ("ASoC: codecs: wcd938x: add audio routing and Kconfig") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210713140417.23693-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3a42c461141482..032c87637f6389 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1557,6 +1557,7 @@ config SND_SOC_WCD934X Qualcomm SoCs like SDM845. config SND_SOC_WCD938X + depends on SND_SOC_WCD938X_SDW tristate config SND_SOC_WCD938X_SDW From c934fec1c32840224fd975544c347823962193b2 Mon Sep 17 00:00:00 2001 From: Mason Zhang Date: Tue, 13 Jul 2021 19:42:48 +0800 Subject: [PATCH 325/714] spi: mediatek: move devm_spi_register_master position This patch move devm_spi_register_master to the end of mtk_spi_probe. If slaves call spi_sync in there probe function, master should have probe done. Signed-off-by: Mason Zhang Link: https://lore.kernel.org/r/20210713114247.1536-1-mason.zhang@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 8d5fa7f1e5069a..68dca8ceb3ad7d 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -803,12 +803,6 @@ static int mtk_spi_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - ret = devm_spi_register_master(&pdev->dev, master); - if (ret) { - dev_err(&pdev->dev, "failed to register master (%d)\n", ret); - goto err_disable_runtime_pm; - } - if (mdata->dev_comp->need_pad_sel) { if (mdata->pad_num != master->num_chipselect) { dev_err(&pdev->dev, @@ -848,6 +842,12 @@ static int mtk_spi_probe(struct platform_device *pdev) dev_notice(&pdev->dev, "SPI dma_set_mask(%d) failed, ret:%d\n", addr_bits, ret); + ret = devm_spi_register_master(&pdev->dev, master); + if (ret) { + dev_err(&pdev->dev, "failed to register master (%d)\n", ret); + goto err_disable_runtime_pm; + } + return 0; err_disable_runtime_pm: From 0abb33bfca0fb74df76aac03e90ce685016ef7be Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 13 Jul 2021 14:04:31 +0100 Subject: [PATCH 326/714] drm/i915/gtt: drop the page table optimisation We skip filling out the pt with scratch entries if the va range covers the entire pt, since we later have to fill it with the PTEs for the object pages anyway. However this might leave open a small window where the PTEs don't point to anything valid for the HW to consume. When for example using 2M GTT pages this fill_px() showed up as being quite significant in perf measurements, and ends up being completely wasted since we ignore the pt and just use the pde directly. Anyway, currently we have our PTE construction split between alloc and insert, which is probably slightly iffy nowadays, since the alloc doesn't actually allocate anything anymore, instead it just sets up the page directories and points the PTEs at the scratch page. Later when we do the insert step we re-program the PTEs again. Better might be to squash the alloc and insert into a single step, then bringing back this optimisation(along with some others) should be possible. Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables") Signed-off-by: Matthew Auld Cc: Jon Bloomfield Cc: Chris Wilson Cc: Daniel Vetter Cc: # v4.15+ Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210713130431.2392740-1-matthew.auld@intel.com (cherry picked from commit 8f88ca76b3942d82e2c1cea8735ec368d89ecc15) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 21c8b7350b7a3c..da4f5eb43ac213 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -303,10 +303,7 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, __i915_gem_object_pin_pages(pt->base); i915_gem_object_make_unshrinkable(pt->base); - if (lvl || - gen8_pt_count(*start, end) < I915_PDES || - intel_vgpu_active(vm->i915)) - fill_px(pt, vm->scratch[lvl]->encode); + fill_px(pt, vm->scratch[lvl]->encode); spin_lock(&pd->lock); if (likely(!pd->entry[idx])) { From 49afa7f6c714ab3f5cb2a4c835f7f9dddc0bb2c1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jul 2021 09:36:58 +0800 Subject: [PATCH 327/714] perf pmu: Skip invalid hybrid pmu On hybrid platform, such as Alderlake, if atom CPUs are offlined, the kernel still exports the sysfs path '/sys/devices/cpu_atom/' for 'cpu_atom' pmu but the file '/sys/devices/cpu_atom/cpus' is empty, which indicates this is an invalid pmu. Need to check and skip the invalid hybrid pmu. Before: # perf list ... branch-instructions OR cpu_atom/branch-instructions/ [Kernel PMU event] branch-instructions OR cpu_core/branch-instructions/ [Kernel PMU event] branch-misses OR cpu_atom/branch-misses/ [Kernel PMU event] branch-misses OR cpu_core/branch-misses/ [Kernel PMU event] bus-cycles OR cpu_atom/bus-cycles/ [Kernel PMU event] bus-cycles OR cpu_core/bus-cycles/ [Kernel PMU event] ... The cpu_atom events are still displayed even if atom CPUs are offlined. After: # perf list ... branch-instructions OR cpu_core/branch-instructions/ [Kernel PMU event] branch-misses OR cpu_core/branch-misses/ [Kernel PMU event] bus-cycles OR cpu_core/bus-cycles/ [Kernel PMU event] ... Now only cpu_core events are displayed. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210708013701.20347-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 44b90d638ad5f1..a1bd7007a8b4d6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -950,6 +950,13 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(format); LIST_HEAD(aliases); __u32 type; + bool is_hybrid = perf_pmu__hybrid_mounted(name); + + /* + * Check pmu name for hybrid and the pmu may be invalid in sysfs + */ + if (!strncmp(name, "cpu_", 4) && !is_hybrid) + return NULL; /* * The pmu data we store & need consists of the pmu @@ -978,7 +985,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->is_hybrid = perf_pmu__hybrid_mounted(name); + pmu->is_hybrid = is_hybrid; pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); From 490e9a8fb4a3d74602668ef2e4cc29608e9b00a6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jul 2021 09:36:59 +0800 Subject: [PATCH 328/714] perf tests: Fix 'Parse event definition strings' on core-only system If the atom CPUs are offlined, the 'cpu_atom' is not valid. We don't need the test case for 'cpu_atom'. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210708013701.20347-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 56a7b6a14195af..8d48667392557f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -6,6 +6,7 @@ #include "tests.h" #include "debug.h" #include "pmu.h" +#include "pmu-hybrid.h" #include #include #include @@ -1596,6 +1597,13 @@ static int test__hybrid_raw1(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + if (!perf_pmu__hybrid_mounted("cpu_atom")) { + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; + } + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); @@ -1620,13 +1628,9 @@ static int test__hybrid_cache_event(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x10002 == (evsel->core.attr.config & 0xffffffff)); return 0; } @@ -2028,7 +2032,7 @@ static struct evlist_test test__hybrid_events[] = { .id = 7, }, { - .name = "cpu_core/LLC-loads/,cpu_atom/LLC-load-misses/", + .name = "cpu_core/LLC-loads/", .check = test__hybrid_cache_event, .id = 8, }, From 212f3d97abc8be09549de12cedb290f47b4dce5a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jul 2021 09:37:00 +0800 Subject: [PATCH 329/714] perf tests: Fix 'Roundtrip evsel->name' on core-only system If the atom CPUs are offlined, the 'cpu_atom' is not valid. Perf will not create two events for one hw event, so the evsel->idx doesn't need to be divided by 2 before comparing. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210708013701.20347-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-roundtrip-name.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 5ebf5633190460..4e09f0a312afc3 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -5,6 +5,7 @@ #include "tests.h" #include "debug.h" #include "pmu.h" +#include "pmu-hybrid.h" #include #include @@ -102,7 +103,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int { int err = 0, ret = 0; - if (perf_pmu__has_hybrid()) + if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) return perf_evsel__name_array_test(evsel__hw_names, 2); err = perf_evsel__name_array_test(evsel__hw_names, 1); From de3d5fd83c9b1099b0d207b41a222dc451184a63 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 8 Jul 2021 09:37:01 +0800 Subject: [PATCH 330/714] perf tests: Fix 'Convert perf time to TSC' on core-only system If the atom CPUs are offlined, the 'cpu_atom' is not valid. We don't need the test case for 'cpu_atom'. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210708013701.20347-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 85d75b9b25a1aa..7c56bc1f4cff0d 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -21,6 +21,7 @@ #include "mmap.h" #include "tests.h" #include "pmu.h" +#include "pmu-hybrid.h" #define CHECK__(x) { \ while ((x) < 0) { \ @@ -93,7 +94,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe * For hybrid "cycles:u", it creates two events. * Init the second evsel here. */ - if (perf_pmu__has_hybrid()) { + if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) { evsel = evsel__next(evsel); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; From e0a7ef2a62e4f61a751bccfc79b9e7acb51474de Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 7 Jul 2021 13:56:52 +0800 Subject: [PATCH 331/714] perf stat: Merge uncore events by default for hybrid platform On a hybrid platform, by default 'perf stat' aggregates and reports the event counts per PMU. For example, # perf stat -e cycles -a true Performance counter stats for 'system wide': 1,400,445 cpu_core/cycles/ 680,881 cpu_atom/cycles/ 0.001770773 seconds time elapsed But for uncore events that's not a suitable method. Uncore has nothing to do with hybrid. So for uncore events, we aggregate event counts from all PMUs and report the counts without PMUs. Before: # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true Performance counter stats for 'system wide': 2,058 uncore_arb_0/event=0x81,umask=0x1/ 2,028 uncore_arb_1/event=0x81,umask=0x1/ 0 uncore_arb_0/event=0x84,umask=0x1/ 0 uncore_arb_1/event=0x84,umask=0x1/ 0.000614498 seconds time elapsed After: # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ -a true Performance counter stats for 'system wide': 3,996 arb/event=0x81,umask=0x1/ 0 arb/event=0x84,umask=0x1/ 0.000630046 seconds time elapsed Of course, we also keep the '--no-merge' working for uncore events. # perf stat -e arb/event=0x81,umask=0x1/,arb/event=0x84,umask=0x1/ --no-merge true Performance counter stats for 'system wide': 1,952 uncore_arb_0/event=0x81,umask=0x1/ 1,921 uncore_arb_1/event=0x81,umask=0x1/ 0 uncore_arb_0/event=0x84,umask=0x1/ 0 uncore_arb_1/event=0x84,umask=0x1/ 0.000575536 seconds time elapsed Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210707055652.962-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 --- tools/perf/util/stat-display.c | 14 +++++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d25cb8088e8c54..634375937db963 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2445,9 +2445,6 @@ int cmd_stat(int argc, const char **argv) evlist__check_cpu_maps(evsel_list); - if (perf_pmu__has_hybrid()) - stat_config.no_merge = true; - /* * Initialize thread_map with comm names, * so we could print it out on output. diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 83a2bc02df15af..588601000f3f98 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -596,6 +596,18 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c } } +static bool is_uncore(struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + return pmu && pmu->is_uncore; +} + +static bool hybrid_uniquify(struct evsel *evsel) +{ + return perf_pmu__has_hybrid() && !is_uncore(evsel); +} + static bool collect_data(struct perf_stat_config *config, struct evsel *counter, void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, bool first), @@ -604,7 +616,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, if (counter->merged_stat) return false; cb(config, counter, data, true); - if (config->no_merge) + if (config->no_merge || hybrid_uniquify(counter)) uniquify_event_name(counter); else if (counter->auto_merge_stats) collect_all_aliases(config, counter, cb, data); From 376a947653f6214f397ef1c5aa2b7b7fc7b68c49 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:55:30 -0300 Subject: [PATCH 332/714] tools headers UAPI: Sync files changed by the memfd_secret new syscall To pick the changes in this cset: 7bb7f2ac24a028b2 ("arch, mm: wire up memfd_secret system call where relevant") That silences these perf build warnings and add support for those new syscalls in tools such as 'perf trace'. For instance, this is now possible: # perf trace -v -e memfd_secret event qualifier tracepoint filter: (common_pid != 13375 && common_pid != 3713) && (id == 447) ^C# That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep memfd_secret tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 447 common memfd_secret sys_memfd_secret $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/unistd.h' differs from latest version at 'arch/arm64/include/uapi/asm/unistd.h' diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Linus Torvalds Cc: Mike Rapoport Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/unistd.h | 1 + tools/include/uapi/asm-generic/unistd.h | 7 ++++++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index f83a70e07df85c..ce2ee8f1e36109 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -20,5 +20,6 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #define __ARCH_WANT_SYS_CLONE3 +#define __ARCH_WANT_MEMFD_SECRET #include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f211961ce1da69..a9d6fcd95f42aa 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -873,8 +873,13 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#ifdef __ARCH_WANT_MEMFD_SECRET +#define __NR_memfd_secret 447 +__SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#endif + #undef __NR_syscalls -#define __NR_syscalls 447 +#define __NR_syscalls 448 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index af973e400053de..f6b57799c1ea2d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -368,6 +368,7 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret # # Due to a historical design error, certain syscalls are numbered differently From 50e98924d72dc730fe1f1646977205adf608dccd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Jul 2021 11:19:07 +0200 Subject: [PATCH 333/714] libperf: Fix build error with LIBPFM4=1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error with LIBPFM4=1: CC util/pfm.o util/pfm.c: In function ‘parse_libpfm_events_option’: util/pfm.c:102:30: error: ‘struct evsel’ has no member named ‘leader’ 102 | evsel->leader = grp_leader; | ^~ Committer notes: There is this entry in 'make -C tools/perf build-test' to test the build with libpfm: $ grep libpfm tools/perf/tests/make make_with_libpfm4 := LIBPFM4=1 run += make_with_libpfm4 $ But the test machine lacked libpfm-devel, now its installed and further cases like this shouldn't happen. Committer testing: Before this patch this fails, after applying it: $ make -C tools/perf build-test make: Entering directory '/var/home/acme/git/perf/tools/perf' - tarpkg: ./tests/perf-targz-src-pkg . make_static: make LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1 -j24 DESTDIR=/tmp/tmp.KzFSfvGRQa make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1 make_with_libpfm4_O: make LIBPFM4=1 make_install_prefix_O: make install prefix=/tmp/krava make_no_auxtrace_O: make NO_AUXTRACE=1 $ rpm -q libpfm-devel libpfm-devel-4.11.0-4.fc34.x86_64 $ FIXME: This shows a need for 'build-test' to bail out when a build option is specified that has no required library devel files installed. Fixes: fba7c86601e2e42d ("libperf: Move 'leader' from tools/perf to perf_evsel::leader") Signed-off-by: Heiko Carstens Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210713091907.1555560-1-hca@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pfm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index dd9ed56e0504e7..756295dedcccce 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -99,7 +99,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, grp_leader = evsel; if (grp_evt > -1) { - evsel->leader = grp_leader; + evsel__set_leader(evsel, grp_leader); grp_leader->core.nr_members++; grp_evt++; } From 506c1da44fee32ba1d3a70413289ad58c772bba6 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 18 May 2021 15:05:50 +0000 Subject: [PATCH 334/714] cifs: use the expiry output of dns_query to schedule next resolution We recently fixed DNS resolution of the server hostname during reconnect. However, server IP address may change, even when the old one continues to server (although sub-optimally). We should schedule the next DNS resolution based on the TTL of the DNS record used for the last resolution. This way, we resolve the server hostname again when a DNS record expires. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: # v5.11+ Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 2 +- fs/cifs/cifsglob.h | 4 +++ fs/cifs/connect.c | 55 +++++++++++++++++++++++++++++++++++++++--- fs/cifs/dns_resolve.c | 10 +++++--- fs/cifs/dns_resolve.h | 2 +- fs/cifs/misc.c | 2 +- 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 57f91311fdaa3a..fa131da01a5f3d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -176,7 +176,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, } } - rc = dns_resolve_server_name_to_ip(name, &srvIP); + rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL); if (rc < 0) { cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", __func__, name, rc); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 3c2e117bb92686..ade7080f7eaa89 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -75,6 +75,9 @@ #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 +/* dns resolution interval in seconds */ +#define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600 + /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 5 @@ -646,6 +649,7 @@ struct TCP_Server_Info { /* point to the SMBD connection if RDMA is used instead of socket */ struct smbd_connection *smbd_conn; struct delayed_work echo; /* echo ping workqueue job */ + struct delayed_work resolve; /* dns resolution workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 01dc45178f6612..f1a7e63ab58fff 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -78,6 +78,8 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) int rc; int len; char *unc, *ipaddr = NULL; + time64_t expiry, now; + unsigned long ttl = SMB_DNS_RESOLVE_INTERVAL_DEFAULT; if (!server->hostname) return -EINVAL; @@ -91,13 +93,13 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) } scnprintf(unc, len, "\\\\%s", server->hostname); - rc = dns_resolve_server_name_to_ip(unc, &ipaddr); + rc = dns_resolve_server_name_to_ip(unc, &ipaddr, &expiry); kfree(unc); if (rc < 0) { cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", __func__, server->hostname, rc); - return rc; + goto requeue_resolve; } spin_lock(&cifs_tcp_ses_lock); @@ -106,7 +108,45 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); kfree(ipaddr); - return !rc ? -1 : 0; + /* rc == 1 means success here */ + if (rc) { + now = ktime_get_real_seconds(); + if (expiry && expiry > now) + /* + * To make sure we don't use the cached entry, retry 1s + * after expiry. + */ + ttl = (expiry - now + 1); + } + rc = !rc ? -1 : 0; + +requeue_resolve: + cifs_dbg(FYI, "%s: next dns resolution scheduled for %lu seconds in the future\n", + __func__, ttl); + mod_delayed_work(cifsiod_wq, &server->resolve, (ttl * HZ)); + + return rc; +} + + +static void cifs_resolve_server(struct work_struct *work) +{ + int rc; + struct TCP_Server_Info *server = container_of(work, + struct TCP_Server_Info, resolve.work); + + mutex_lock(&server->srv_mutex); + + /* + * Resolve the hostname again to make sure that IP address is up-to-date. + */ + rc = reconn_set_ipaddr_from_hostname(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } + + mutex_unlock(&server->srv_mutex); } #ifdef CONFIG_CIFS_DFS_UPCALL @@ -680,6 +720,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); cancel_delayed_work_sync(&server->echo); + cancel_delayed_work_sync(&server->resolve); spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; @@ -1260,6 +1301,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) spin_unlock(&cifs_tcp_ses_lock); cancel_delayed_work_sync(&server->echo); + cancel_delayed_work_sync(&server->resolve); if (from_reconnect) /* @@ -1342,6 +1384,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); + INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server); INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); mutex_init(&tcp_ses->reconnect_mutex); memcpy(&tcp_ses->srcaddr, &ctx->srcaddr, @@ -1427,6 +1470,12 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) /* queue echo request delayed work */ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval); + /* queue dns resolution delayed work */ + cifs_dbg(FYI, "%s: next dns resolution scheduled for %d seconds in the future\n", + __func__, SMB_DNS_RESOLVE_INTERVAL_DEFAULT); + + queue_delayed_work(cifsiod_wq, &tcp_ses->resolve, (SMB_DNS_RESOLVE_INTERVAL_DEFAULT * HZ)); + return tcp_ses; out_err_crypto_release: diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index d15b82d569ef8c..8c616aaeb7c417 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -24,6 +24,7 @@ * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. * @unc: UNC path specifying the server (with '/' as delimiter) * @ip_addr: Where to return the IP address. + * @expiry: Where to return the expiry time for the dns record. * * The IP address will be returned in string form, and the caller is * responsible for freeing it. @@ -31,7 +32,7 @@ * Returns length of result on success, -ve on error. */ int -dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) +dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry) { struct sockaddr_storage ss; const char *hostname, *sep; @@ -66,13 +67,14 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) /* Perform the upcall */ rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, - NULL, ip_addr, NULL, false); + NULL, ip_addr, expiry, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); else - cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n", - __func__, len, len, hostname, *ip_addr); + cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n", + __func__, len, len, hostname, *ip_addr, + expiry ? (*expiry) : 0); return rc; name_is_IP_address: diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 5be060b82b13c8..9fa2807ef79e04 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -12,7 +12,7 @@ #define _DNS_RESOLVE_H #ifdef __KERNEL__ -extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); +extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry); #endif /* KERNEL */ #endif /* _DNS_RESOLVE_H */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 184138b4eb8cda..844abeb2b48fe7 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1187,7 +1187,7 @@ int match_target_ip(struct TCP_Server_Info *server, cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2); - rc = dns_resolve_server_name_to_ip(target, &tip); + rc = dns_resolve_server_name_to_ip(target, &tip, NULL); if (rc < 0) goto out; From 50630b3f1ada0bf412d3f28e73bac310448d9d6f Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 13 Jul 2021 12:22:59 +1000 Subject: [PATCH 335/714] cifs: Do not use the original cruid when following DFS links for multiuser mounts Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213565 cruid should only be used for the initial mount and after this we should use the current users credentials. Ignore the original cruid mount argument when creating a new context for a multiuser mount following a DFS link. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Reported-by: Xiaoli Feng Signed-off-by: Ronnie Sahlberg Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index fa131da01a5f3d..007427ba75e5fb 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -211,6 +211,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata, else noff = tkn_e - (sb_mountdata + off) + 1; + if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) { + off += noff; + continue; + } if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) { off += noff; continue; From c9c9c6815f9004ee1ec87401ed0796853bd70f1b Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Mon, 12 Jul 2021 19:34:02 +0900 Subject: [PATCH 336/714] cifs: fix the out of range assignment to bit fields in parse_server_interfaces Because the out of range assignment to bit fields are compiler-dependant, the fields could have wrong value. Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e4c8f603dd5889..232d528df230d3 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -557,8 +557,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; while (bytes_left >= sizeof(*p)) { info->speed = le64_to_cpu(p->LinkSpeed); - info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE); - info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE); + info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; + info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0; cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count); cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); From 9e5c772954406829e928dbe59891d08938ead04b Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Wed, 14 Jul 2021 14:54:19 +0000 Subject: [PATCH 337/714] drm/ttm: add a check against null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling ttm_range_man_fini(), 'man' may be uninitialized, which may cause a null pointer dereference bug. Fix this by checking if it is a null pointer. This log reveals it: [ 7.902580 ] BUG: kernel NULL pointer dereference, address: 0000000000000058 [ 7.905721 ] RIP: 0010:ttm_range_man_fini+0x40/0x160 [ 7.911826 ] Call Trace: [ 7.911826 ] radeon_ttm_fini+0x167/0x210 [ 7.911826 ] radeon_bo_fini+0x15/0x40 [ 7.913767 ] rs400_fini+0x55/0x80 [ 7.914358 ] radeon_device_fini+0x3c/0x140 [ 7.914358 ] radeon_driver_unload_kms+0x5c/0xe0 [ 7.914358 ] radeon_driver_load_kms+0x13a/0x200 [ 7.914358 ] ? radeon_driver_unload_kms+0xe0/0xe0 [ 7.914358 ] drm_dev_register+0x1db/0x290 [ 7.914358 ] radeon_pci_probe+0x16a/0x230 [ 7.914358 ] local_pci_probe+0x4a/0xb0 Signed-off-by: Zheyu Ma Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1626274459-8148-1-git-send-email-zheyuma97@gmail.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_range_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 03395386e8a7cb..f4b08a8705b320 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -181,6 +181,9 @@ int ttm_range_man_fini(struct ttm_device *bdev, struct drm_mm *mm = &rman->mm; int ret; + if (!man) + return 0; + ttm_resource_manager_set_used(man, false); ret = ttm_resource_manager_evict_all(bdev, man); From 14158aa4510439c611759d57b74ac01ebcca0081 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 14 Jul 2021 11:02:37 -0500 Subject: [PATCH 338/714] usb: gadget: fsl_qe_udc: Fix fall-through warning for Clang Fix the following fallthrough warning (powerpc-randconfig): drivers/usb/gadget/udc/fsl_qe_udc.c:589:4: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60ef0750.I8J+C6KAtb0xVOAa%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/usb/gadget/udc/fsl_qe_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c index 8e85889336289d..15db7a3868fe4d 100644 --- a/drivers/usb/gadget/udc/fsl_qe_udc.c +++ b/drivers/usb/gadget/udc/fsl_qe_udc.c @@ -586,6 +586,7 @@ static int qe_ep_init(struct qe_udc *udc, case USB_SPEED_FULL: if (max <= 1023) break; + fallthrough; default: goto en_done; } From afbd0d299289a0faaa605af74982f87ad75468fc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 14 Jul 2021 11:05:55 -0500 Subject: [PATCH 339/714] dmaengine: mpc512x: Fix fall-through warning for Clang Fix the following fallthrough warning (powerpc-randconfig): drivers/dma/mpc512x_dma.c:816:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60ef0750.I8J+C6KAtb0xVOAa%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/dma/mpc512x_dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index c1a69149c8bf85..4a51fdbf5aa9c3 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -813,6 +813,7 @@ inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308) case 16: if (is_mpc8308) return false; + break; case 1: case 2: case 4: From d08c84e01afa7a7eee6badab25d5420fa847f783 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Jul 2021 13:06:38 -0300 Subject: [PATCH 340/714] perf sched: Cast PTHREAD_STACK_MIN to int as it may turn into sysconf(__SC_THREAD_STACK_MIN_VALUE) In fedora rawhide the PTHREAD_STACK_MIN define may end up expanded to a sysconf() call, and that will return 'long int', breaking the build: 45 fedora:rawhide : FAIL gcc version 11.1.1 20210623 (Red Hat 11.1.1-6) (GCC) builtin-sched.c: In function 'create_tasks': /git/perf-5.14.0-rc1/tools/include/linux/kernel.h:43:24: error: comparison of distinct pointer types lacks a cast [-Werror] 43 | (void) (&_max1 == &_max2); \ | ^~ builtin-sched.c:673:34: note: in expansion of macro 'max' 673 | (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); | ^~~ cc1: all warnings being treated as errors $ grep __sysconf /usr/include/*/*.h /usr/include/bits/pthread_stack_min-dynamic.h:extern long int __sysconf (int __name) __THROW; /usr/include/bits/pthread_stack_min-dynamic.h:# define PTHREAD_STACK_MIN __sysconf (__SC_THREAD_STACK_MIN_VALUE) /usr/include/bits/time.h:extern long int __sysconf (int); /usr/include/bits/time.h:# define CLK_TCK ((__clock_t) __sysconf (2)) /* 2 is _SC_CLK_TCK */ $ So cast it to int to cope with that. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 954ce2f594e967..aaf1cde476c951 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -670,7 +670,7 @@ static void create_tasks(struct perf_sched *sched) err = pthread_attr_init(&attr); BUG_ON(err); err = pthread_attr_setstacksize(&attr, - (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); + (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); BUG_ON(err); err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); From 64752a95b702817602d72f109ceaf5ec0780e283 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 10:48:36 +0200 Subject: [PATCH 341/714] ALSA: usb-audio: Add missing proc text entry for BESPOKEN type Recently we've added a new usb_mixer element type, USB_MIXER_BESPOKEN, but it wasn't added in the table in snd_usb_mixer_dump_cval(). This is no big problem since each bespoken type should have its own dump method, but it still isn't disallowed to use the standard one, so we should cover it as well. Along with it, define the table with the explicit array initializer for avoiding other pitfalls. Fixes: 785b6f29a795 ("ALSA: usb-audio: scarlett2: Fix wrong resume call") Reported-by: Pavel Machek Cc: Link: https://lore.kernel.org/r/20210714084836.1977-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 30b3e128e28d86..f4cdaf1ba44ace 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3295,7 +3295,15 @@ static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer, { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); static const char * const val_types[] = { - "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32", + [USB_MIXER_BOOLEAN] = "BOOLEAN", + [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN", + [USB_MIXER_S8] = "S8", + [USB_MIXER_U8] = "U8", + [USB_MIXER_S16] = "S16", + [USB_MIXER_U16] = "U16", + [USB_MIXER_S32] = "S32", + [USB_MIXER_U32] = "U32", + [USB_MIXER_BESPOKEN] = "BESPOKEN", }; snd_iprintf(buffer, " Info: id=%i, control=%i, cmask=0x%x, " "channels=%i, type=\"%s\"\n", cval->head.id, From 104aba8dd7dca85d82e94aba07e71994ccdaf4cf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 14 Jul 2021 11:10:40 -0500 Subject: [PATCH 342/714] powerpc/smp: Fix fall-through warning for Clang Fix the following fallthrough warning: arch/powerpc/platforms/powermac/smp.c:149:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60ef0750.I8J+C6KAtb0xVOAa%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/powerpc/platforms/powermac/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdfea6d6ab69a2..3256a316e884d5 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -146,6 +146,7 @@ static inline void psurge_clr_ipi(int cpu) switch(psurge_type) { case PSURGE_DUAL: out_8(psurge_sec_intr, ~0); + break; case PSURGE_NONE: break; default: From b7eb335e26a9c7f258c96b3962c283c379d3ede0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Jul 2021 00:57:54 -0500 Subject: [PATCH 343/714] Makefile: Enable -Wimplicit-fallthrough for Clang With the recent fixes for fallthrough warnings, it is now possible to enable -Wimplicit-fallthrough for Clang. It's important to mention that since we have adopted the use of the pseudo-keyword macro fallthrough; we also want to avoid having more /* fall through */ comments being introduced. Notice that contrary to GCC, Clang doesn't recognize any comments as implicit fall-through markings when the -Wimplicit-fallthrough option is enabled. So, in order to avoid having more comments being introduced, we have to use the option -Wimplicit-fallthrough=5 for GCC, which similar to Clang, will cause a warning in case a code comment is intended to be used as a fall-through marking. Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- Makefile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c3f9bd191b8941..eaa692976851af 100644 --- a/Makefile +++ b/Makefile @@ -797,12 +797,6 @@ KBUILD_CFLAGS += -Wno-gnu # source of a reference will be _MergedGlobals and not on of the whitelisted names. # See modpost pattern 2 KBUILD_CFLAGS += -mno-global-merge -else - -# Warn about unmarked fall-throughs in switch statement. -# Disabled for clang while comment to attribute conversion happens and -# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed. -KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,) endif # These warnings generated too much noise in a regular build. @@ -983,6 +977,9 @@ NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) # warn about C99 declaration after statement KBUILD_CFLAGS += -Wdeclaration-after-statement +# Warn about unmarked fall-throughs in switch statement. +KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,$(call cc-option,-Wimplicit-fallthrough,)) + # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla From f8f0edabcc09fafd695ed2adc0eb825104e35d5c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 1 Jul 2021 08:19:28 +0100 Subject: [PATCH 344/714] KVM: selftests: x86: Address missing vm_install_exception_handler conversions Commit b78f4a59669 ("KVM: selftests: Rename vm_handle_exception") raced with a couple of new x86 tests, missing two vm_handle_exception to vm_install_exception_handler conversions. Help the two broken tests to catch up with the new world. Cc: Andrew Jones CC: Ricardo Koller Cc: Paolo Bonzini Signed-off-by: Marc Zyngier Message-Id: <20210701071928.2971053-1-maz@kernel.org> Reviewed-by: Andrew Jones Reviewed-by: Ricardo Koller Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 +- tools/testing/selftests/kvm/x86_64/mmu_role_test.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 42bd658f52a827..af27c7e829c195 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -615,7 +615,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c index 523371cf8e8f7b..da2325fcad87bf 100644 --- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c @@ -71,7 +71,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, PF_VECTOR, guest_pf_handler); + vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler); r = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); From f0414b078dd11641a7a64027c2741396f47718fd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 24 Jun 2021 17:18:53 -0700 Subject: [PATCH 345/714] Revert "KVM: x86: WARN and reject loading KVM if NX is supported but not enabled" Let KVM load if EFER.NX=0 even if NX is supported, the analysis and testing (or lack thereof) for the non-PAE host case was garbage. If the kernel won't be using PAE paging, .Ldefault_entry in head_32.S skips over the entire EFER sequence. Hopefully that can be changed in the future to allow KVM to require EFER.NX, but the motivation behind KVM's requirement isn't yet merged. Reverting and revisiting the mess at a later date is by far the safest approach. This reverts commit 8bbed95d2cb6e5de8a342d761a89b0a04faed7be. Fixes: 8bbed95d2cb6 ("KVM: x86: WARN and reject loading KVM if NX is supported but not enabled") Signed-off-by: Sean Christopherson Message-Id: <20210625001853.318148-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 17468d983fbd57..cf10d196ca65ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10981,9 +10981,6 @@ int kvm_arch_hardware_setup(void *opaque) int r; rdmsrl_safe(MSR_EFER, &host_efer); - if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) && - !(host_efer & EFER_NX))) - return -EIO; if (boot_cpu_has(X86_FEATURE_XSAVES)) rdmsrl(MSR_IA32_XSS, host_xss); From 4bf48e3c0aafd32b960d341c4925b48f416f14a5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jun 2021 16:05:46 -0700 Subject: [PATCH 346/714] KVM: x86: Use guest MAXPHYADDR from CPUID.0x8000_0008 iff TDP is enabled Ignore the guest MAXPHYADDR reported by CPUID.0x8000_0008 if TDP, i.e. NPT, is disabled, and instead use the host's MAXPHYADDR. Per AMD'S APM: Maximum guest physical address size in bits. This number applies only to guests using nested paging. When this field is zero, refer to the PhysAddrSize field for the maximum guest physical address size. Fixes: 24c82e576b78 ("KVM: Sanitize cpuid") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210623230552.4027702-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c42613cfb5ba6d..1a4217b3e18551 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -940,8 +940,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; - if (!g_phys_as) + /* + * Use bare metal's MAXPHADDR if the CPU doesn't report guest + * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the + * guest version "applies only to guests using nested paging". + */ + if (!g_phys_as || !tdp_enabled) g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); From e39f00f60ebd2e7b295c37a05e6349df656d3eb8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jun 2021 16:05:47 -0700 Subject: [PATCH 347/714] KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR Use boot_cpu_data.x86_phys_bits instead of the raw CPUID information to enumerate the MAXPHYADDR for KVM guests when TDP is disabled (the guest version is only relevant to NPT/TDP). When using shadow paging, any reductions to the host's MAXPHYADDR apply to KVM and its guests as well, i.e. using the raw CPUID info will cause KVM to misreport the number of PA bits available to the guest. Unconditionally zero out the "Physical Address bit reduction" entry. For !TDP, the adjustment is already done, and for TDP enumerating the host's reduction is wrong as the reduction does not apply to GPAs. Fixes: 9af9b94068fb ("x86/cpu/AMD: Handle SME reduction in physical address size") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210623230552.4027702-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1a4217b3e18551..ca7866d63e982d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -941,11 +941,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) unsigned phys_as = entry->eax & 0xff; /* - * Use bare metal's MAXPHADDR if the CPU doesn't report guest - * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the - * guest version "applies only to guests using nested paging". + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as + * the guest operates in the same PA space as the host, i.e. + * reductions in MAXPHYADDR for memory encryption affect shadow + * paging, too. + * + * If TDP is enabled but an explicit guest MAXPHYADDR is not + * provided, use the raw bare metal MAXPHYADDR as reductions to + * the HPAs do not affect GPAs. */ - if (!g_phys_as || !tdp_enabled) + if (!tdp_enabled) + g_phys_as = boot_cpu_data.x86_phys_bits; + else if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); @@ -970,12 +977,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0x8000001a: case 0x8000001e: break; - /* Support memory encryption cpuid if host supports it */ case 0x8000001F: - if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) + if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; - else + } else { cpuid_entry_override(entry, CPUID_8000_001F_EAX); + + /* + * Enumerate '0' for "PA bits reduction", the adjusted + * MAXPHYADDR is enumerated directly (see 0x80000008). + */ + entry->ebx &= ~GENMASK(11, 6); + } break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: From fc9bf2e087efcd81bda2e52d09616d2a1bf982a8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jun 2021 16:05:49 -0700 Subject: [PATCH 348/714] KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs Ignore "dynamic" host adjustments to the physical address mask when generating the masks for guest PTEs, i.e. the guest PA masks. The host physical address space and guest physical address space are two different beasts, e.g. even though SEV's C-bit is the same bit location for both host and guest, disabling SME in the host (which clears shadow_me_mask) does not affect the guest PTE->GPA "translation". For non-SEV guests, not dropping bits is the correct behavior. Assuming KVM and userspace correctly enumerate/configure guest MAXPHYADDR, bits that are lost as collateral damage from memory encryption are treated as reserved bits, i.e. KVM will never get to the point where it attempts to generate a gfn using the affected bits. And if userspace wants to create a bogus vCPU, then userspace gets to deal with the fallout of hardware doing odd things with bad GPAs. For SEV guests, not dropping the C-bit is technically wrong, but it's a moot point because KVM can't read SEV guest's page tables in any case since they're always encrypted. Not to mention that the current KVM code is also broken since sme_me_mask does not have to be non-zero for SEV to be supported by KVM. The proper fix would be to teach all of KVM to correctly handle guest private memory, but that's a task for the future. Fixes: d0ec49d4de90 ("kvm/x86/svm: Support Secure Memory Encryption within KVM") Cc: stable@vger.kernel.org Cc: Brijesh Singh Cc: Tom Lendacky Signed-off-by: Sean Christopherson Message-Id: <20210623230552.4027702-5-seanjc@google.com> [Use a new header instead of adding header guards to paging_tmpl.h. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 ++ arch/x86/kvm/mmu/paging.h | 14 ++++++++++++++ arch/x86/kvm/mmu/paging_tmpl.h | 4 ++-- arch/x86/kvm/mmu/spte.h | 6 ------ 4 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 arch/x86/kvm/mmu/paging.h diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 00732757cc609c..b888385d19336a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -53,6 +53,8 @@ #include #include "trace.h" +#include "paging.h" + extern bool itlb_multihit_kvm_mitigation; int __read_mostly nx_huge_pages = -1; diff --git a/arch/x86/kvm/mmu/paging.h b/arch/x86/kvm/mmu/paging.h new file mode 100644 index 00000000000000..de8ab323bb7076 --- /dev/null +++ b/arch/x86/kvm/mmu/paging.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Shadow paging constants/helpers that don't need to be #undef'd. */ +#ifndef __KVM_X86_PAGING_H +#define __KVM_X86_PAGING_H + +#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) +#define PT64_LVL_ADDR_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#define PT64_LVL_OFFSET_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#endif /* __KVM_X86_PAGING_H */ + diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 490a028ddabe98..ee044d357b5f98 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -24,7 +24,7 @@ #define pt_element_t u64 #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) @@ -57,7 +57,7 @@ #define pt_element_t u64 #define guest_walker guest_walkerEPT #define FNAME(name) ept_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 7a5ce931410755..eb7b227fc6cfe1 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #else #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #endif -#define PT64_LVL_ADDR_MASK(level) \ - (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) -#define PT64_LVL_OFFSET_MASK(level) \ - (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | shadow_x_mask | shadow_nx_mask | shadow_me_mask) From 76ff371b67cb12fb635396234468abcf6a466f16 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 24 Jun 2021 19:03:54 -0700 Subject: [PATCH 349/714] KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler Don't clear the C-bit in the #NPF handler, as it is a legal GPA bit for non-SEV guests, and for SEV guests the C-bit is dropped before the GPA hits the NPT in hardware. Clearing the bit for non-SEV guests causes KVM to mishandle #NPFs with that collide with the host's C-bit. Although the APM doesn't explicitly state that the C-bit is not reserved for non-SEV, Tom Lendacky confirmed that the following snippet about the effective reduction due to the C-bit does indeed apply only to SEV guests. Note that because guest physical addresses are always translated through the nested page tables, the size of the guest physical address space is not impacted by any physical address space reduction indicated in CPUID 8000_001F[EBX]. If the C-bit is a physical address bit however, the guest physical address space is effectively reduced by 1 bit. And for SEV guests, the APM clearly states that the bit is dropped before walking the nested page tables. If the C-bit is an address bit, this bit is masked from the guest physical address when it is translated through the nested page tables. Consequently, the hypervisor does not need to be aware of which pages the guest has chosen to mark private. Note, the bogus C-bit clearing was removed from legacy #PF handler in commit 6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF interception"). Fixes: 0ede79e13224 ("KVM: SVM: Clear C-bit from the page fault address") Cc: Peter Gonda Cc: Brijesh Singh Cc: Tom Lendacky Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210625020354.431829-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8834822c00cdcc..ca5614a48b2129 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1923,7 +1923,7 @@ static int npf_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); + u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; trace_kvm_page_fault(fault_address, error_code); From 23fa2e46a5556f787ce2ea1a315d3ab93cced204 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 26 Jun 2021 15:03:04 +0800 Subject: [PATCH 350/714] KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio BUG: KASAN: use-after-free in kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183 Read of size 8 at addr ffff0000c03a2500 by task syz-executor083/4269 CPU: 5 PID: 4269 Comm: syz-executor083 Not tainted 5.10.0 #7 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2d0 arch/arm64/kernel/stacktrace.c:132 show_stack+0x28/0x34 arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x110/0x164 lib/dump_stack.c:118 print_address_description+0x78/0x5c8 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report+0x148/0x1e4 mm/kasan/report.c:562 check_memory_region_inline mm/kasan/generic.c:183 [inline] __asan_load8+0xb4/0xbc mm/kasan/generic.c:252 kvm_vm_ioctl_unregister_coalesced_mmio+0x7c/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:183 kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 Allocated by task 4269: stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc+0xdc/0x120 mm/kasan/common.c:461 kasan_kmalloc+0xc/0x14 mm/kasan/common.c:475 kmem_cache_alloc_trace include/linux/slab.h:450 [inline] kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:664 [inline] kvm_vm_ioctl_register_coalesced_mmio+0x78/0x1cc arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:146 kvm_vm_ioctl+0x7e8/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3746 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 Freed by task 4269: stack_trace_save+0x80/0xb8 kernel/stacktrace.c:121 kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x38/0x6c mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:355 __kasan_slab_free+0x124/0x150 mm/kasan/common.c:422 kasan_slab_free+0x10/0x1c mm/kasan/common.c:431 slab_free_hook mm/slub.c:1544 [inline] slab_free_freelist_hook mm/slub.c:1577 [inline] slab_free mm/slub.c:3142 [inline] kfree+0x104/0x38c mm/slub.c:4124 coalesced_mmio_destructor+0x94/0xa4 arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:102 kvm_iodevice_destructor include/kvm/iodev.h:61 [inline] kvm_io_bus_unregister_dev+0x248/0x280 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:4374 kvm_vm_ioctl_unregister_coalesced_mmio+0x158/0x1ec arch/arm64/kvm/../../../virt/kvm/coalesced_mmio.c:186 kvm_vm_ioctl+0xe30/0x14c4 arch/arm64/kvm/../../../virt/kvm/kvm_main.c:3755 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __arm64_sys_ioctl+0xf88/0x131c fs/ioctl.c:739 __invoke_syscall arch/arm64/kernel/syscall.c:36 [inline] invoke_syscall arch/arm64/kernel/syscall.c:48 [inline] el0_svc_common arch/arm64/kernel/syscall.c:158 [inline] do_el0_svc+0x120/0x290 arch/arm64/kernel/syscall.c:220 el0_svc+0x1c/0x28 arch/arm64/kernel/entry-common.c:367 el0_sync_handler+0x98/0x170 arch/arm64/kernel/entry-common.c:383 el0_sync+0x140/0x180 arch/arm64/kernel/entry.S:670 If kvm_io_bus_unregister_dev() return -ENOMEM, we already call kvm_iodevice_destructor() inside this function to delete 'struct kvm_coalesced_mmio_dev *dev' from list and free the dev, but kvm_iodevice_destructor() is called again, it will lead the above issue. Let's check the the return value of kvm_io_bus_unregister_dev(), only call kvm_iodevice_destructor() if the return value is 0. Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Message-Id: <20210626070304.143456-1-wangkefeng.wang@huawei.com> Cc: stable@vger.kernel.org Fixes: 5d3c4c79384a ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed", 2021-04-20) Signed-off-by: Paolo Bonzini --- virt/kvm/coalesced_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index f08f5e82460b1d..0be80c213f7f25 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -186,7 +186,6 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, coalesced_mmio_in_range(dev, zone->addr, zone->size)) { r = kvm_io_bus_unregister_dev(kvm, zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); - kvm_iodevice_destructor(&dev->dev); /* * On failure, unregister destroys all devices on the @@ -196,6 +195,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, */ if (r) break; + kvm_iodevice_destructor(&dev->dev); } } From 7234c362ccb3c2228f06f19f93b132de9cfa7ae4 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 28 Jun 2021 15:43:54 +0800 Subject: [PATCH 351/714] KVM: x86/pmu: Clear anythread deprecated bit when 0xa leaf is unsupported on the SVM The AMD platform does not support the functions Ah CPUID leaf. The returned results for this entry should all remain zero just like the native does: AMD host: 0x0000000a 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 (uncanny) AMD guest: 0x0000000a 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00008000 Fixes: cadbaa039b99 ("perf/x86/intel: Make anythread filter support conditional") Signed-off-by: Like Xu Message-Id: <20210628074354.33848-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ca7866d63e982d..739be5da3bca78 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.anythread_deprecated = 1; + if (cap.version) + edx.split.anythread_deprecated = 1; edx.split.reserved1 = 0; edx.split.reserved2 = 0; From 3b0462726e7ef281c35a7a4ae33e93ee2bc9975b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 14 Jul 2021 15:47:49 +0200 Subject: [PATCH 352/714] cgroup: verify that source is a string The following sequence can be used to trigger a UAF: int fscontext_fd = fsopen("cgroup"); int fd_null = open("/dev/null, O_RDONLY); int fsconfig(fscontext_fd, FSCONFIG_SET_FD, "source", fd_null); close_range(3, ~0U, 0); The cgroup v1 specific fs parser expects a string for the "source" parameter. However, it is perfectly legitimate to e.g. specify a file descriptor for the "source" parameter. The fs parser doesn't know what a filesystem allows there. So it's a bug to assume that "source" is always of type fs_value_is_string when it can reasonably also be fs_value_is_file. This assumption in the cgroup code causes a UAF because struct fs_parameter uses a union for the actual value. Access to that union is guarded by the param->type member. Since the cgroup paramter parser didn't check param->type but unconditionally moved param->string into fc->source a close on the fscontext_fd would trigger a UAF during put_fs_context() which frees fc->source thereby freeing the file stashed in param->file causing a UAF during a close of the fd_null. Fix this by verifying that param->type is actually a string and report an error if not. In follow up patches I'll add a new generic helper that can be used here and by other filesystems instead of this error-prone copy-pasta fix. But fixing it in here first makes backporting a it to stable a lot easier. Fixes: 8d2451f4994f ("cgroup1: switch to option-by-option parsing") Reported-by: syzbot+283ce5a46486d6acdbaf@syzkaller.appspotmail.com Cc: Christoph Hellwig Cc: Alexander Viro Cc: Dmitry Vyukov Cc: Cc: syzkaller-bugs Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- kernel/cgroup/cgroup-v1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index ee93b6e8958746..527917c0b30be4 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -912,6 +912,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "Non-string source"); if (fc->source) return invalf(fc, "Multiple sources not supported"); fc->source = param->string; From d1d488d813703618f0dd93f0e4c4a05928114aa8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 14 Jul 2021 15:47:50 +0200 Subject: [PATCH 353/714] fs: add vfs_parse_fs_param_source() helper Add a simple helper that filesystems can use in their parameter parser to parse the "source" parameter. A few places open-coded this function and that already caused a bug in the cgroup v1 parser that we fixed. Let's make it harder to get this wrong by introducing a helper which performs all necessary checks. Link: https://syzkaller.appspot.com/bug?id=6312526aba5beae046fdae8f00399f87aab48b12 Cc: Christoph Hellwig Cc: Alexander Viro Cc: Dmitry Vyukov Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- fs/fs_context.c | 54 +++++++++++++++++++++++++------------- include/linux/fs_context.h | 2 ++ kernel/cgroup/cgroup-v1.c | 14 ++++------ 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index 2834d1afa6e805..de1985eae535f6 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -79,6 +79,35 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) return -ENOPARAM; } +/** + * vfs_parse_fs_param_source - Handle setting "source" via parameter + * @fc: The filesystem context to modify + * @param: The parameter + * + * This is a simple helper for filesystems to verify that the "source" they + * accept is sane. + * + * Returns 0 on success, -ENOPARAM if this is not "source" parameter, and + * -EINVAL otherwise. In the event of failure, supplementary error information + * is logged. + */ +int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param) +{ + if (strcmp(param->key, "source") != 0) + return -ENOPARAM; + + if (param->type != fs_value_is_string) + return invalf(fc, "Non-string source"); + + if (fc->source) + return invalf(fc, "Multiple sources"); + + fc->source = param->string; + param->string = NULL; + return 0; +} +EXPORT_SYMBOL(vfs_parse_fs_param_source); + /** * vfs_parse_fs_param - Add a single parameter to a superblock config * @fc: The filesystem context to modify @@ -122,15 +151,9 @@ int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) /* If the filesystem doesn't take any arguments, give it the * default handling of source. */ - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "VFS: Non-string source"); - if (fc->source) - return invalf(fc, "VFS: Multiple sources"); - fc->source = param->string; - param->string = NULL; - return 0; - } + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; return invalf(fc, "%s: Unknown parameter '%s'", fc->fs_type->name, param->key); @@ -504,16 +527,11 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) struct legacy_fs_context *ctx = fc->fs_private; unsigned int size = ctx->data_size; size_t len = 0; + int ret; - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "VFS: Legacy: Non-string source"); - if (fc->source) - return invalf(fc, "VFS: Legacy: Multiple sources"); - fc->source = param->string; - param->string = NULL; - return 0; - } + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 37e1e8f7f08da3..e2bc16300c8283 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -139,6 +139,8 @@ extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); +extern int vfs_parse_fs_param_source(struct fs_context *fc, + struct fs_parameter *param); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 527917c0b30be4..8d6bf56ed77a8d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -911,15 +911,11 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "Non-string source"); - if (fc->source) - return invalf(fc, "Multiple sources not supported"); - fc->source = param->string; - param->string = NULL; - return 0; - } + int ret; + + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; From edb25572fc7058db5a98223e11d2d50497178553 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 23 Jun 2021 00:50:01 -0700 Subject: [PATCH 354/714] mmc: core: Use kref in place of struct mmc_blk_data::usage Ulf reported the following KASAN splat after adding some manual hacks into mmc-utils[1]. DEBUG: mmc_blk_open: Let's sleep for 10s.. mmc1: card 0007 removed BUG: KASAN: use-after-free in mmc_blk_get+0x58/0xb8 Read of size 4 at addr ffff00000a394a28 by task mmc/180 CPU: 2 PID: 180 Comm: mmc Not tainted 5.10.0-rc4-00069-gcc758c8c7127-dirty #5 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) Call trace: dump_backtrace+0x0/0x2b4 show_stack+0x18/0x6c dump_stack+0xfc/0x168 print_address_description.constprop.0+0x6c/0x488 kasan_report+0x118/0x210 __asan_load4+0x94/0xd0 mmc_blk_get+0x58/0xb8 mmc_blk_open+0x7c/0xdc __blkdev_get+0x3b4/0x964 blkdev_get+0x64/0x100 blkdev_open+0xe8/0x104 do_dentry_open+0x234/0x61c vfs_open+0x54/0x64 path_openat+0xe04/0x1584 do_filp_open+0xe8/0x1e4 do_sys_openat2+0x120/0x230 __arm64_sys_openat+0xf0/0x15c el0_svc_common.constprop.0+0xac/0x234 do_el0_svc+0x84/0xa0 el0_sync_handler+0x264/0x270 el0_sync+0x174/0x180 Allocated by task 33: stack_trace_save+0x9c/0xdc kasan_save_stack+0x28/0x60 __kasan_kmalloc.constprop.0+0xc8/0xf0 kasan_kmalloc+0x10/0x20 mmc_blk_alloc_req+0x94/0x4b0 mmc_blk_probe+0x2d4/0xaa4 mmc_bus_probe+0x34/0x4c really_probe+0x148/0x6e0 driver_probe_device+0x78/0xec __device_attach_driver+0x108/0x16c bus_for_each_drv+0xf4/0x15c __device_attach+0x168/0x240 device_initial_probe+0x14/0x20 bus_probe_device+0xec/0x100 device_add+0x55c/0xaf0 mmc_add_card+0x288/0x380 mmc_attach_sd+0x18c/0x22c mmc_rescan+0x444/0x4f0 process_one_work+0x3b8/0x650 worker_thread+0xa0/0x724 kthread+0x218/0x220 ret_from_fork+0x10/0x38 Freed by task 33: stack_trace_save+0x9c/0xdc kasan_save_stack+0x28/0x60 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x4c __kasan_slab_free+0x100/0x180 kasan_slab_free+0x14/0x20 kfree+0xb8/0x46c mmc_blk_put+0xe4/0x11c mmc_blk_remove_req.part.0+0x6c/0xe4 mmc_blk_remove+0x368/0x370 mmc_bus_remove+0x34/0x50 __device_release_driver+0x228/0x31c device_release_driver+0x2c/0x44 bus_remove_device+0x1e4/0x200 device_del+0x2b0/0x770 mmc_remove_card+0xf0/0x150 mmc_sd_detect+0x9c/0x150 mmc_rescan+0x110/0x4f0 process_one_work+0x3b8/0x650 worker_thread+0xa0/0x724 kthread+0x218/0x220 ret_from_fork+0x10/0x38 The buggy address belongs to the object at ffff00000a394800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 552 bytes inside of 1024-byte region [ffff00000a394800, ffff00000a394c00) The buggy address belongs to the page: page:00000000ff84ed53 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8a390 head:00000000ff84ed53 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x3fffc0000010200(slab|head) raw: 03fffc0000010200 dead000000000100 dead000000000122 ffff000009f03800 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff00000a394900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff00000a394980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff00000a394a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff00000a394a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff00000a394b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Looking closer at the problem, it looks like a classic dangling pointer bug. The 'struct mmc_blk_data' that is used after being freed in mmc_blk_put() is stashed away in 'md->disk->private_data' via mmc_blk_alloc_req() but used in mmc_blk_get() because the 'usage' count isn't properly aligned with the lifetime of the pointer. You'd expect the 'usage' member to be in sync with the kfree(), and it mostly is, except that mmc_blk_get() needs to dereference the potentially freed memory storage for the 'struct mmc_blk_data' stashed away in the private_data member to look at 'usage' before it actually figures out if it wants to consider it a valid pointer or not. That's not going to work if the freed memory has been overwritten by something else after the free, and KASAN rightly complains here. To fix the immediate problem, let's set the private_data member to NULL in mmc_blk_put() so that mmc_blk_get() can consider the object "on the way out" if the pointer is NULL and not even try to look at 'usage' if the object isn't going to be around much longer. With that set to NULL on the last mmc_blk_put(), optimize the get path further and use a kref underneath the 'open_lock' mutex to only up the reference count if it's non-zero, i.e. alive, and otherwise make mmc_blk_get() return NULL, without actually testing the reference count if we're in the process of removing the object from the system. Finally, tighten the locking region on the put side to only be around the parts that are removing the 'mmc_blk_data' from the system and publishing that fact to the gendisk and then drop the lock as soon as we can to avoid holding the lock around code that doesn't need it. This fixes the KASAN issue. Cc: Matthias Schiffer Cc: Sujit Kautkar Cc: Zubin Mithra Reported-by: Ulf Hansson Link: https://lore.kernel.org/linux-mmc/CAPDyKFryT63Jc7+DXWSpAC19qpZRqFr1orxwYGMuSqx247O8cQ@mail.gmail.com/ [1] Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210623075002.1746924-2-swboyd@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9890a1532cb0b2..ce8aed5629295d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -111,7 +112,7 @@ struct mmc_blk_data { #define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ #define MMC_BLK_REL_WR (1 << 1) /* MMC Reliable write support */ - unsigned int usage; + struct kref kref; unsigned int read_only; unsigned int part_type; unsigned int reset_done; @@ -181,10 +182,8 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) mutex_lock(&open_lock); md = disk->private_data; - if (md && md->usage == 0) + if (md && !kref_get_unless_zero(&md->kref)) md = NULL; - if (md) - md->usage++; mutex_unlock(&open_lock); return md; @@ -196,18 +195,25 @@ static inline int mmc_get_devidx(struct gendisk *disk) return devidx; } -static void mmc_blk_put(struct mmc_blk_data *md) +static void mmc_blk_kref_release(struct kref *ref) { - mutex_lock(&open_lock); - md->usage--; - if (md->usage == 0) { - int devidx = mmc_get_devidx(md->disk); + struct mmc_blk_data *md = container_of(ref, struct mmc_blk_data, kref); + int devidx; - ida_simple_remove(&mmc_blk_ida, devidx); - put_disk(md->disk); - kfree(md); - } + devidx = mmc_get_devidx(md->disk); + ida_simple_remove(&mmc_blk_ida, devidx); + + mutex_lock(&open_lock); + md->disk->private_data = NULL; mutex_unlock(&open_lock); + + put_disk(md->disk); + kfree(md); +} + +static void mmc_blk_put(struct mmc_blk_data *md) +{ + kref_put(&md->kref, mmc_blk_kref_release); } static ssize_t power_ro_lock_show(struct device *dev, @@ -2327,7 +2333,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, INIT_LIST_HEAD(&md->part); INIT_LIST_HEAD(&md->rpmbs); - md->usage = 1; + kref_init(&md->kref); + md->queue.blkdata = md; md->disk->major = MMC_BLOCK_MAJOR; From 10252bae863d09b9648bed2e035572d207200ca1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 23 Jun 2021 00:50:02 -0700 Subject: [PATCH 355/714] mmc: core: Don't allocate IDA for OF aliases There's a chance that the IDA allocated in mmc_alloc_host() is not freed for some time because it's freed as part of a class' release function (see mmc_host_classdev_release() where the IDA is freed). If another thread is holding a reference to the class, then only once all balancing device_put() calls (in turn calling kobject_put()) have been made will the IDA be released and usable again. Normally this isn't a problem because the kobject is released before anything else that may want to use the same number tries to again, but with CONFIG_DEBUG_KOBJECT_RELEASE=y and OF aliases it becomes pretty easy to try to allocate an alias from the IDA twice while the first time it was allocated is still pending a call to ida_simple_remove(). It's also possible to trigger it by using CONFIG_DEBUG_KOBJECT_RELEASE and probe defering a driver at boot that calls mmc_alloc_host() before trying to get resources that may defer likes clks or regulators. Instead of allocating from the IDA in this scenario, let's just skip it if we know this is an OF alias. The number is already "claimed" and devices that aren't using OF aliases won't try to use the claimed numbers anyway (see mmc_first_nonreserved_index()). This should avoid any issues with mmc_alloc_host() returning failures from the ida_simple_get() in the case that we're using an OF alias. Cc: Matthias Schiffer Cc: Sujit Kautkar Reported-by: Zubin Mithra Fixes: fa2d0aa96941 ("mmc: core: Allow setting slot index via device tree alias") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210623075002.1746924-3-swboyd@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/host.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index eda4a1892c33c5..0475d96047c408 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -75,7 +75,8 @@ static void mmc_host_classdev_release(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); wakeup_source_unregister(host->ws); - ida_simple_remove(&mmc_host_ida, host->index); + if (of_alias_get_id(host->parent->of_node, "mmc") < 0) + ida_simple_remove(&mmc_host_ida, host->index); kfree(host); } @@ -502,7 +503,7 @@ static int mmc_first_nonreserved_index(void) */ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) { - int err; + int index; struct mmc_host *host; int alias_id, min_idx, max_idx; @@ -515,20 +516,19 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) alias_id = of_alias_get_id(dev->of_node, "mmc"); if (alias_id >= 0) { - min_idx = alias_id; - max_idx = alias_id + 1; + index = alias_id; } else { min_idx = mmc_first_nonreserved_index(); max_idx = 0; - } - err = ida_simple_get(&mmc_host_ida, min_idx, max_idx, GFP_KERNEL); - if (err < 0) { - kfree(host); - return NULL; + index = ida_simple_get(&mmc_host_ida, min_idx, max_idx, GFP_KERNEL); + if (index < 0) { + kfree(host); + return NULL; + } } - host->index = err; + host->index = index; dev_set_name(&host->class_dev, "mmc%d", host->index); host->ws = wakeup_source_register(NULL, dev_name(&host->class_dev)); From fa2c02e5798c17c89cbb3135940086ebe07e5c9f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Jul 2021 14:28:02 -0300 Subject: [PATCH 356/714] tools headers: Remove broken definition of __LITTLE_ENDIAN The linux/kconfig.h file was copied from the kernel but the line where with the generated/autoconf.h include from where the CONFIG_ entries would come from was deleted, as tools/ build system don't create that file, so we ended up always defining just __LITTLE_ENDIAN as CONFIG_CPU_BIG_ENDIAN was nowhere to be found. This in turn ended up breaking the build in some systems where __LITTLE_ENDIAN was already defined, such as the androind NDK. So just ditch that block that depends on the CONFIG_CPU_BIG_ENDIAN define. The kconfig.h file was copied just to get IS_ENABLED() and a 'make -C tools/all' doesn't breaks with this removal. Fixes: 93281c4a96572a34 ("x86/insn: Add an insn_decode() API") Cc: Adrian Hunter Cc: Borislav Petkov Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/YO8hK7lqJcIWuBzx@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/kconfig.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h index 1555a0c4f34514..13b86bd3b7461d 100644 --- a/tools/include/linux/kconfig.h +++ b/tools/include/linux/kconfig.h @@ -4,12 +4,6 @@ /* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ -#ifdef CONFIG_CPU_BIG_ENDIAN -#define __BIG_ENDIAN 4321 -#else -#define __LITTLE_ENDIAN 1234 -#endif - #define __ARG_PLACEHOLDER_1 0, #define __take_second_arg(__ignored, val, ...) val From 83d1fc92d4cdd1ea2d229347ddf11ea2aa751059 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 24 Jun 2021 17:43:02 +0100 Subject: [PATCH 357/714] perf cs-etm: Split Coresight decode by aux records Populate the auxtrace queues using AUX records rather than whole auxtrace buffers so that the decoder is reset between each aux record. This is similar to the auxtrace_queues__process_index() -> auxtrace_queues__add_indexed_event() flow where perf_session__peek_event() is used to read AUXTRACE events out of random positions in the file based on the auxtrace index. But now we loop over all PERF_RECORD_AUX events instead of AUXTRACE buffers. For each PERF_RECORD_AUX event, we find the corresponding AUXTRACE buffer using the index, and add a fragment of that buffer to the auxtrace queues. No other changes to decoding were made, apart from populating the auxtrace queues. The result of decoding is identical to before, except in cases where decoding failed completely, due to not resetting the decoder. The reason for this change is because AUX records are emitted any time tracing is disabled, for example when the process is scheduled out. Because ETM was disabled and enabled again, the decoder also needs to be reset to force the search for a sync packet. Otherwise there would be fatal decoding errors. Testing ======= Testing was done with the following script, to diff the decoding results between the patched and un-patched versions of perf: #!/bin/bash set -ex $1 script -i $3 $4 > split.script $2 script -i $3 $4 > default.script diff split.script default.script | head -n 20 And it was run like this, with various itrace options depending on the quantity of synthesised events: compare.sh ./perf-patched ./perf-default perf-per-cpu-2-threads.data --itrace=i100000ns No changes in output were observed in the following scenarios: * Simple per-cpu perf record -e cs_etm/@tmc_etr0/u top * Per-thread, single thread perf record -e cs_etm/@tmc_etr0/u --per-thread ./threads_C * Per-thread multiple threads (but only one thread collected data): perf record -e cs_etm/@tmc_etr0/u --per-thread --pid 4596,4597 * Per-thread multiple threads (both threads collected data): perf record -e cs_etm/@tmc_etr0/u --per-thread --pid 4596,4597 * Per-cpu explicit threads: perf record -e cs_etm/@tmc_etr0/u --pid 853,854 * System-wide (per-cpu): perf record -e cs_etm/@tmc_etr0/u -a * No data collected (no aux buffers) Can happen with any command when run for a short period * Containing truncated records Can happen with any command * Containing aux records with 0 size Can happen with any command * Snapshot mode (various files with and without buffer wrap) perf record -e cs_etm/@tmc_etr0/u -a --snapshot Some differences were observed in the following scenario: * Snapshot mode (with duplicate buffers) perf record -e cs_etm/@tmc_etr0/u -a --snapshot Fewer samples are generated in snapshot mode if duplicate buffers were gathered because buffers with the same offset are now only added once. This gives different, but more correct results and no duplicate data is decoded any more. Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Tested-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210624164303.28632-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 168 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 32ad92d3e454d2..22f8326547eb00 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2683,6 +2683,172 @@ static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, return metadata; } +/** + * Puts a fragment of an auxtrace buffer into the auxtrace queues based + * on the bounds of aux_event, if it matches with the buffer that's at + * file_offset. + * + * Normally, whole auxtrace buffers would be added to the queue. But we + * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder + * is reset across each buffer, so splitting the buffers up in advance has + * the same effect. + */ +static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, + struct perf_record_aux *aux_event, struct perf_sample *sample) +{ + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *auxtrace_event_union; + struct perf_record_auxtrace *auxtrace_event; + union perf_event auxtrace_fragment; + __u64 aux_offset, aux_size; + + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* + * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got + * from looping through the auxtrace index. + */ + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); + if (err) + return err; + auxtrace_event = &auxtrace_event_union->auxtrace; + if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || + auxtrace_event->header.size != sz) { + return -EINVAL; + } + + /* + * In per-thread mode, CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + */ + if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || + auxtrace_event->cpu != sample->cpu) + return 1; + + if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { + /* + * Clamp size in snapshot mode. The buffer size is clamped in + * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect + * the buffer size. + */ + aux_size = min(aux_event->aux_size, auxtrace_event->size); + + /* + * In this mode, the head also points to the end of the buffer so aux_offset + * needs to have the size subtracted so it points to the beginning as in normal mode + */ + aux_offset = aux_event->aux_offset - aux_size; + } else { + aux_size = aux_event->aux_size; + aux_offset = aux_event->aux_offset; + } + + if (aux_offset >= auxtrace_event->offset && + aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + /* + * If this AUX event was inside this buffer somewhere, create a new auxtrace event + * based on the sizes of the aux event, and queue that fragment. + */ + auxtrace_fragment.auxtrace = *auxtrace_event; + auxtrace_fragment.auxtrace.size = aux_size; + auxtrace_fragment.auxtrace.offset = aux_offset; + file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; + + pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); + return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + } + + /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ + return 1; +} + +static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + struct perf_sample sample; + int ret; + struct auxtrace_index_entry *ent; + struct auxtrace_index *auxtrace_index; + struct evsel *evsel; + size_t i; + + /* Don't care about any other events, we're only queuing buffers for AUX events */ + if (event->header.type != PERF_RECORD_AUX) + return 0; + + if (event->header.size < sizeof(struct perf_record_aux)) + return -EINVAL; + + /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ + if (!event->aux.aux_size) + return 0; + + /* + * Parse the sample, we need the sample_id_all data that comes after the event so that the + * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. + */ + evsel = evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + ret = evsel__parse_sample(evsel, event, &sample); + if (ret) + return ret; + + /* + * Loop through the auxtrace index to find the buffer that matches up with this aux event. + */ + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + ret = cs_etm__queue_aux_fragment(session, ent->file_offset, + ent->sz, &event->aux, &sample); + /* + * Stop search on error or successful values. Continue search on + * 1 ('not found') + */ + if (ret != 1) + return ret; + } + } + + /* + * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but + * don't exit with an error because it will still be possible to decode other aux records. + */ + pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); + return 0; +} + +static int cs_etm__queue_aux_records(struct perf_session *session) +{ + struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, + struct auxtrace_index, list); + if (index && index->nr > 0) + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__queue_aux_records_cb, NULL); + + /* + * We would get here if there are no entries in the index (either no auxtrace + * buffers or no index at all). Fail silently as there is the possibility of + * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still + * false. + * + * In that scenario, buffers will not be split by AUX records. + */ + return 0; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2883,7 +3049,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&etm->queues, session); + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; From 775da83005cb61d4c213c636df9337da05714ff1 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Tue, 13 Jul 2021 09:26:11 +0800 Subject: [PATCH 358/714] drm/amdgpu: add another Renoir DID Add new PCI device id. Signed-off-by: Jinzhou Su Reviewed-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.11.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 71beb0db0125da..abb928894eac0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1168,6 +1168,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ + {0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, From 9c23aa51477a37f8b56c3c40192248db0663c196 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 19:00:21 +0200 Subject: [PATCH 359/714] r8152: Fix potential PM refcount imbalance rtl8152_close() takes the refcount via usb_autopm_get_interface() but it doesn't release when RTL8152_UNPLUG test hits. This may lead to the imbalance of PM refcount. This patch addresses it. Link: https://bugzilla.suse.com/show_bug.cgi?id=1186194 Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1692d3b1b6e19e..4096e20e97251d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6763,9 +6763,10 @@ static int rtl8152_close(struct net_device *netdev) tp->rtl_ops.down(tp); mutex_unlock(&tp->control); + } + if (!res) usb_autopm_put_interface(tp->intf); - } free_all_mem(tp); From 776ac63a986d211286230c4fd70f85390eabedcd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 19:00:22 +0200 Subject: [PATCH 360/714] r8152: Fix a deadlock by doubly PM resume r8152 driver sets up the MAC address at reset-resume, while rtl8152_set_mac_address() has the temporary autopm get/put. This may lead to a deadlock as the PM lock has been already taken for the execution of the runtime PM callback. This patch adds the workaround to avoid the superfluous autpm when called from rtl8152_reset_resume(). Link: https://bugzilla.suse.com/show_bug.cgi?id=1186194 Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4096e20e97251d..e09b107b5c9920 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1552,7 +1552,8 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, u32 advertising); -static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +static int __rtl8152_set_mac_address(struct net_device *netdev, void *p, + bool in_resume) { struct r8152 *tp = netdev_priv(netdev); struct sockaddr *addr = p; @@ -1561,9 +1562,11 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) goto out1; - ret = usb_autopm_get_interface(tp->intf); - if (ret < 0) - goto out1; + if (!in_resume) { + ret = usb_autopm_get_interface(tp->intf); + if (ret < 0) + goto out1; + } mutex_lock(&tp->control); @@ -1575,11 +1578,17 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) mutex_unlock(&tp->control); - usb_autopm_put_interface(tp->intf); + if (!in_resume) + usb_autopm_put_interface(tp->intf); out1: return ret; } +static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +{ + return __rtl8152_set_mac_address(netdev, p, false); +} + /* Devices containing proper chips can support a persistent * host system provided MAC address. * Examples of this are Dell TB15 and Dell WD15 docks @@ -1698,7 +1707,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) return ret; } -static int set_ethernet_addr(struct r8152 *tp) +static int set_ethernet_addr(struct r8152 *tp, bool in_resume) { struct net_device *dev = tp->netdev; struct sockaddr sa; @@ -1711,7 +1720,7 @@ static int set_ethernet_addr(struct r8152 *tp) if (tp->version == RTL_VER_01) ether_addr_copy(dev->dev_addr, sa.sa_data); else - ret = rtl8152_set_mac_address(dev, &sa); + ret = __rtl8152_set_mac_address(dev, &sa, in_resume); return ret; } @@ -8444,7 +8453,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf) clear_bit(SELECTIVE_SUSPEND, &tp->flags); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, true); return rtl8152_resume(intf); } @@ -9645,7 +9654,7 @@ static int rtl8152_probe(struct usb_interface *intf, tp->rtl_fw.retry = true; #endif queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, false); usb_set_intfdata(intf, tp); From 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 3 Jun 2021 01:15:59 +0200 Subject: [PATCH 361/714] posix-cpu-timers: Fix rearm racing against process tick Since the process wide cputime counter is started locklessly from posix_cpu_timer_rearm(), it can be concurrently stopped by operations on other timers from the same thread group, such as in the following unlucky scenario: CPU 0 CPU 1 ----- ----- timer_settime(TIMER B) posix_cpu_timer_rearm(TIMER A) cpu_clock_sample_group() (pct->timers_active already true) handle_posix_cpu_timers() check_process_timers() stop_process_timers() pct->timers_active = false arm_timer(TIMER A) tick -> run_posix_cpu_timers() // sees !pct->timers_active, ignore // our TIMER A Fix this with simply locking process wide cputime counting start and timer arm in the same block. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Fixes: 60f2ceaa8111 ("posix-cpu-timers: Remove unnecessary locking around cpu_clock_sample_group") Cc: stable@vger.kernel.org Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Eric W. Biederman --- kernel/time/posix-cpu-timers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 29a5e54e6e1054..517be7fd175ef3 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) if (!p) goto out; + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) + goto out; + /* * Fetch the current sample and update the timer's expiry time. */ @@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) bump_cpu_timer(timer, now); - /* Protect timer list r/w in arm_timer() */ - sighand = lock_task_sighand(p, &flags); - if (unlikely(sighand == NULL)) - goto out; - /* * Now re-arm for the new expiry time. */ From aebacb7f6ca1926918734faae14d1f0b6fae5cb7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 9 Jul 2021 16:13:25 +0200 Subject: [PATCH 362/714] timers: Fix get_next_timer_interrupt() with no timers pending 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") subtly altered get_next_timer_interrupt()'s behaviour. The function no longer consistently returns KTIME_MAX with no timers pending. In order to decide if there are any timers pending we check whether the next expiry will happen NEXT_TIMER_MAX_DELTA jiffies from now. Unfortunately, the next expiry time and the timer base clock are no longer updated in unison. The former changes upon certain timer operations (enqueue, expire, detach), whereas the latter keeps track of jiffies as they move forward. Ultimately breaking the logic above. A simplified example: - Upon entering get_next_timer_interrupt() with: jiffies = 1 base->clk = 0; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry == base->clk + NEXT_TIMER_MAX_DELTA', the function returns KTIME_MAX. - 'base->clk' is updated to the jiffies value. - The next time we enter get_next_timer_interrupt(), taking into account no timer operations happened: base->clk = 1; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry != base->clk + NEXT_TIMER_MAX_DELTA', the function returns a valid expire time, which is incorrect. This ultimately might unnecessarily rearm sched's timer on nohz_full setups, and add latency to the system[1]. So, introduce 'base->timers_pending'[2], update it every time 'base->next_expiry' changes, and use it in get_next_timer_interrupt(). [1] See tick_nohz_stop_tick(). [2] A quick pahole check on x86_64 and arm64 shows it doesn't make 'struct timer_base' any bigger. Fixes: 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Frederic Weisbecker --- kernel/time/timer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3fadb58fc9d7b1..9eb11c2209e56b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -207,6 +207,7 @@ struct timer_base { unsigned int cpu; bool next_expiry_recalc; bool is_idle; + bool timers_pending; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, * can reevaluate the wheel: */ base->next_expiry = bucket_expiry; + base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); } @@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base) } base->next_expiry_recalc = false; + base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); return next; } @@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); u64 expires = KTIME_MAX; unsigned long nextevt; - bool is_max_delta; /* * Pretend that there is no timer pending if the cpu is offline. @@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (base->next_expiry_recalc) base->next_expiry = __next_timer_interrupt(base); nextevt = base->next_expiry; - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); /* * We have a fresh next event. Check whether we can forward the @@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) expires = basem; base->is_idle = false; } else { - if (!is_max_delta) + if (base->timers_pending) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle. @@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu) base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->timers_pending = false; base->is_idle = false; } return 0; From 4a5c155a5ab372516a1a5ddd29473f8f696feb79 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 15 Jul 2021 04:02:22 +0700 Subject: [PATCH 363/714] MAINTAINERS: Add Suravee Suthikulpanit as Reviewer for AMD IOMMU (AMD-Vi) To help review changes related to AMD IOMMU. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/1626296542-30454-1-git-send-email-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a61f4f3b78a91f..f2b60e9555937c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -933,6 +933,7 @@ F: drivers/video/fbdev/geode/ AMD IOMMU (AMD-VI) M: Joerg Roedel +R: Suravee Suthikulpanit L: iommu@lists.linux-foundation.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git From 32a19de21ae40f0601f48575b610dde4f518ccc6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 7 Jul 2021 11:51:10 +0200 Subject: [PATCH 364/714] drm/vc4: hdmi: Drop devm interrupt handler for CEC interrupts The CEC interrupt handlers are registered through the devm_request_threaded_irq function. However, while free_irq is indeed called properly when the device is unbound or bind fails, it's called after unbind or bind is done. In our particular case, it means that on failure it creates a window where our interrupt handler can be called, but we're freeing every resource (CEC adapter, DRM objects, etc.) it might need. In order to address this, let's switch to the non-devm variant to control better when the handler will be unregistered and allow us to make it safe. Fixes: 15b4511a4af6 ("drm/vc4: add HDMI CEC support") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20210707095112.1469670-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index aab1b36ceb3c70..c2876731ee2dc5 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1857,38 +1857,46 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (vc4_hdmi->variant->external_irq_controller) { - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-rx"), - vc4_cec_irq_handler_rx_bare, - vc4_cec_irq_handler_rx_thread, 0, - "vc4 hdmi cec rx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"), + vc4_cec_irq_handler_rx_bare, + vc4_cec_irq_handler_rx_thread, 0, + "vc4 hdmi cec rx", vc4_hdmi); if (ret) goto err_delete_cec_adap; - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-tx"), - vc4_cec_irq_handler_tx_bare, - vc4_cec_irq_handler_tx_thread, 0, - "vc4 hdmi cec tx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"), + vc4_cec_irq_handler_tx_bare, + vc4_cec_irq_handler_tx_thread, 0, + "vc4 hdmi cec tx", vc4_hdmi); if (ret) - goto err_delete_cec_adap; + goto err_remove_cec_rx_handler; } else { HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); - ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0), - vc4_cec_irq_handler, - vc4_cec_irq_handler_thread, 0, - "vc4 hdmi cec", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq(pdev, 0), + vc4_cec_irq_handler, + vc4_cec_irq_handler_thread, 0, + "vc4 hdmi cec", vc4_hdmi); if (ret) goto err_delete_cec_adap; } ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev); if (ret < 0) - goto err_delete_cec_adap; + goto err_remove_handlers; return 0; +err_remove_handlers: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + else + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + +err_remove_cec_rx_handler: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + err_delete_cec_adap: cec_delete_adapter(vc4_hdmi->cec_adap); @@ -1897,6 +1905,15 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi) { + struct platform_device *pdev = vc4_hdmi->pdev; + + if (vc4_hdmi->variant->external_irq_controller) { + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + } else { + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + } + cec_unregister_adapter(vc4_hdmi->cec_adap); } #else From f8c2602733c953ed7a16e060640b8e96f9d94b9b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Jun 2021 23:50:07 +0200 Subject: [PATCH 365/714] s390/ftrace: fix ftrace_update_ftrace_func implementation s390 enforces DYNAMIC_FTRACE if FUNCTION_TRACER is selected. At the same time implementation of ftrace_caller is not compliant with HAVE_DYNAMIC_FTRACE since it doesn't provide implementation of ftrace_update_ftrace_func() and calls ftrace_trace_function() directly. The subtle difference is that during ftrace code patching ftrace replaces function tracer via ftrace_update_ftrace_func() and activates it back afterwards. Unexpected direct calls to ftrace_trace_function() during ftrace code patching leads to nullptr-dereferences when tracing is activated for one of functions which are used during code patching. Those function currently are: copy_from_kernel_nofault() copy_from_kernel_nofault_allowed() preempt_count_sub() [with debug_defconfig] preempt_count_add() [with debug_defconfig] Corresponding KASAN report: BUG: KASAN: nullptr-dereference in function_trace_call+0x316/0x3b0 Read of size 4 at addr 0000000000001e08 by task migration/0/15 CPU: 0 PID: 15 Comm: migration/0 Tainted: G B 5.13.0-41423-g08316af3644d Hardware name: IBM 3906 M04 704 (LPAR) Stopper: multi_cpu_stop+0x0/0x3e0 <- stop_machine_cpuslocked+0x1e4/0x218 Call Trace: [<0000000001f77caa>] show_stack+0x16a/0x1d0 [<0000000001f8de42>] dump_stack+0x15a/0x1b0 [<0000000001f81d56>] print_address_description.constprop.0+0x66/0x2e0 [<000000000082b0ca>] kasan_report+0x152/0x1c0 [<00000000004cfd8e>] function_trace_call+0x316/0x3b0 [<0000000001fb7082>] ftrace_caller+0x7a/0x7e [<00000000006bb3e6>] copy_from_kernel_nofault_allowed+0x6/0x10 [<00000000006bb42e>] copy_from_kernel_nofault+0x3e/0xd0 [<000000000014605c>] ftrace_make_call+0xb4/0x1f8 [<000000000047a1b4>] ftrace_replace_code+0x134/0x1d8 [<000000000047a6e0>] ftrace_modify_all_code+0x120/0x1d0 [<000000000047a7ec>] __ftrace_modify_code+0x5c/0x78 [<000000000042395c>] multi_cpu_stop+0x224/0x3e0 [<0000000000423212>] cpu_stopper_thread+0x33a/0x5a0 [<0000000000243ff2>] smpboot_thread_fn+0x302/0x708 [<00000000002329ea>] kthread+0x342/0x408 [<00000000001066b2>] __ret_from_fork+0x92/0xf0 [<0000000001fb57fa>] ret_from_fork+0xa/0x30 The buggy address belongs to the page: page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1 flags: 0x1ffff00000001000(reserved|node=0|zone=0|lastcpupid=0x1ffff) raw: 1ffff00000001000 0000040000000048 0000040000000048 0000000000000000 raw: 0000000000000000 0000000000000000 ffffffff00000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: 0000000000001d00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 0000000000001d80: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 >0000000000001e00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 ^ 0000000000001e80: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 0000000000001f00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 ================================================================== To fix that introduce ftrace_func callback to be called from ftrace_caller and update it in ftrace_update_ftrace_func(). Fixes: 4cc9bed034d1 ("[S390] cleanup ftrace backend functions") Cc: stable@vger.kernel.org Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ftrace.h | 1 + arch/s390/kernel/ftrace.c | 2 ++ arch/s390/kernel/mcount.S | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 695c61989f97c7..345cbe982a8bfa 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -19,6 +19,7 @@ void ftrace_caller(void); extern char ftrace_graph_caller_end; extern unsigned long ftrace_plt; +extern void *ftrace_func; struct dyn_arch_ftrace { }; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c6ddeb5029b496..2d8f595d919616 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -40,6 +40,7 @@ * trampoline (ftrace_plt), which clobbers also r1. */ +void *ftrace_func __read_mostly = ftrace_stub; unsigned long ftrace_plt; int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, @@ -85,6 +86,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_update_ftrace_func(ftrace_func_t func) { + ftrace_func = func; return 0; } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index faf64c2f90f52e..6b13797143a72a 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -59,13 +59,13 @@ ENTRY(ftrace_caller) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r1,ftrace_trace_function + lgrl %r1,ftrace_func #else lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r1,ftrace_trace_function + larl %r1,ftrace_func lg %r1,0(%r1) #endif lgr %r3,%r14 From e44fbdb68049539de9923ce4bad2d277aef54892 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Jul 2021 11:36:50 +1000 Subject: [PATCH 366/714] KVM: PPC: Book3S HV P9: Fix guest TM support The conversion to C introduced several bugs in TM handling that can cause host crashes with TM bad thing interrupts. Mostly just simple typos or missed logic in the conversion that got through due to my not testing TM in the guest sufficiently. - Early TM emulation for the softpatch interrupt should be done if fake suspend mode is _not_ active. - Early TM emulation wants to return immediately to the guest so as to not doom transactions unnecessarily. - And if exiting from the guest, the host MSR should include the TM[S] bit if the guest was T/S, before it is treclaimed. After this fix, all the TM selftests pass when running on a P9 processor that implements TM with softpatch interrupt. Fixes: 89d35b2391015 ("KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210712013650.376325-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_p9_entry.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 83f592eadcd2d8..961b3d70483ca1 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tm_return_to_guest: +#endif mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); @@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * is in real suspend mode and is trying to transition to * transactional mode. */ - if (local_paca->kvm_hstate.fake_suspend && + if (!local_paca->kvm_hstate.fake_suspend && (vcpu->arch.shregs.msr & MSR_TS_S)) { if (kvmhv_p9_tm_emulation_early(vcpu)) { - /* Prevent it being handled again. */ - trap = 0; + /* + * Go straight back into the guest with the + * new NIP/MSR as set by TM emulation. + */ + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); + mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); + + /* + * tm_return_to_guest re-loads SRR0/1, DAR, + * DSISR after RI is cleared, in case they had + * been clobbered by a MCE. + */ + __mtmsrd(0, 1); /* clear RI */ + goto tm_return_to_guest; } } #endif @@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * If we are in real mode, only switch MMU on after the MMU is * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + vcpu->arch.shregs.msr & MSR_TS_MASK) + msr |= MSR_TS_S; + __mtmsrd(msr, 0); end_timing(vcpu); From 21de80b53b3727c33d0771c5a28114be5803d157 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 2 Jul 2021 20:29:31 -0500 Subject: [PATCH 367/714] LICENSES/dual/CC-BY-4.0: Git rid of "smart quotes" A couple of exotic quote characters came in with this license text; they can confuse software that is not expecting non-ASCII text. Switch to normal quotes here, with no changes to the actual license text. Reported-by: Rahul T R Signed-off-by: Nishanth Menon CC: Greg Kroah-Hartman Acked-by: Thomas Gleixner Acked-by: Thorsten Leemhuis Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210703012931.30604-1-nm@ti.com Signed-off-by: Jonathan Corbet --- LICENSES/dual/CC-BY-4.0 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSES/dual/CC-BY-4.0 b/LICENSES/dual/CC-BY-4.0 index 45a81b8e46697f..869cad3d164382 100644 --- a/LICENSES/dual/CC-BY-4.0 +++ b/LICENSES/dual/CC-BY-4.0 @@ -392,7 +392,7 @@ Section 8 -- Interpretation. Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons +will be considered the "Licensor." The text of the Creative Commons public licenses is dedicated to the public domain under the CC0 Public Domain Dedication. Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as From 842f697776926b06ada93b16658cdd77cc0ef710 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Jul 2021 13:31:52 +0200 Subject: [PATCH 368/714] Documentation/features: Update the ARCH_HAS_TICK_BROADCAST entry Risc-V gained support recently. Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/YN2nqOVHgGDt4Iid@gmail.com Signed-off-by: Jonathan Corbet --- .../features/time/arch-tick-broadcast/arch-support.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt index 8639fe8315f50e..8dcaab070c7b06 100644 --- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt +++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt @@ -22,7 +22,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | ok | | sparc: | TODO | From d3fb38266afefc6424d7179c14936c5908d5e2f2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Jul 2021 13:31:17 +0200 Subject: [PATCH 369/714] Documentation/features: Add THREAD_INFO_IN_TASK feature matrix Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/YN2nhV5F0hBVNPuX@gmail.com Signed-off-by: Jonathan Corbet --- .../core/thread-info-in-task/arch-support.txt | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Documentation/features/core/thread-info-in-task/arch-support.txt diff --git a/Documentation/features/core/thread-info-in-task/arch-support.txt b/Documentation/features/core/thread-info-in-task/arch-support.txt new file mode 100644 index 00000000000000..9f0259bbd7dfaa --- /dev/null +++ b/Documentation/features/core/thread-info-in-task/arch-support.txt @@ -0,0 +1,32 @@ +# +# Feature name: thread-info-in-task +# Kconfig: THREAD_INFO_IN_TASK +# description: arch makes use of the core kernel facility to embedd thread_info in task_struct +# + ----------------------- + | arch |status| + ----------------------- + | alpha: | TODO | + | arc: | TODO | + | arm: | TODO | + | arm64: | ok | + | csky: | TODO | + | h8300: | TODO | + | hexagon: | TODO | + | ia64: | TODO | + | m68k: | TODO | + | microblaze: | TODO | + | mips: | TODO | + | nds32: | ok | + | nios2: | TODO | + | openrisc: | TODO | + | parisc: | TODO | + | powerpc: | ok | + | riscv: | ok | + | s390: | ok | + | sh: | TODO | + | sparc: | TODO | + | um: | TODO | + | x86: | ok | + | xtensa: | TODO | + ----------------------- From 530c4374e21ae750c5fa5aa67b36a97635ddb379 Mon Sep 17 00:00:00 2001 From: Hu Haowen Date: Sun, 20 Jun 2021 09:04:44 +0800 Subject: [PATCH 370/714] docs/zh_CN: add a missing space character "LinusTorvalds" is not pretty. Replace it with "Linus Torvalds". Signed-off-by: Hu Haowen Link: https://lore.kernel.org/r/20210620010444.24813-1-src.res@email.cn Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/process/2.Process.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/process/2.Process.rst b/Documentation/translations/zh_CN/process/2.Process.rst index 229629e305ca9b..4a6ed021949437 100644 --- a/Documentation/translations/zh_CN/process/2.Process.rst +++ b/Documentation/translations/zh_CN/process/2.Process.rst @@ -47,7 +47,7 @@ (顺便说一句,值得注意的是,合并窗口期间集成的更改并不是凭空产生的;它们是经 提前收集、测试和分级的。稍后将详细描述该过程的工作方式。) -合并窗口持续大约两周。在这段时间结束时,LinusTorvalds将声明窗口已关闭,并 +合并窗口持续大约两周。在这段时间结束时,Linus Torvalds将声明窗口已关闭,并 释放第一个“rc”内核。例如,对于目标为5.6的内核,在合并窗口结束时发生的释放 将被称为5.6-rc1。-rc1 版本是一个信号,表示合并新特性的时间已经过去,稳定下一 个内核的时间已经到来。 @@ -168,7 +168,7 @@ Greg Kroah-Hartman领导。稳定团队将使用5.x.y编号方案不定期地发 补丁如何进入内核 ---------------- -只有一个人可以将补丁合并到主线内核存储库中:LinusTorvalds。但是,在进入 +只有一个人可以将补丁合并到主线内核存储库中:Linus Torvalds。但是,在进入 2.6.38内核的9500多个补丁中,只有112个(大约1.3%)是由Linus自己直接选择的。 内核项目已经发展到一个没有一个开发人员可以在没有支持的情况下检查和选择每个 补丁的规模。内核开发人员处理这种增长的方式是使用围绕信任链构建的助理系统。 From d549c66766ee42175e2f4a47219b4103ddab4ffa Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 22 Jun 2021 13:33:27 +0200 Subject: [PATCH 371/714] dt-bindings: net: dsa: sja1105: Fix indentation warnings Some of the lines aren't properly indented, causing yamllint to warn about them: .../nxp,sja1105.yaml:70:17: [warning] wrong indentation: expected 18 but found 16 (indentation) Use the proper indentation to fix those warnings. Signed-off-by: Thierry Reding Fixes: 070f5b701d559ae1 ("dt-bindings: net: dsa: sja1105: add SJA1110 bindings") Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210622113327.3613595-1-thierry.reding@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index 0b8a05dd52e60c..f978f8719d8e87 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -67,8 +67,8 @@ properties: reg: oneOf: - enum: - - 0 - - 1 + - 0 + - 1 required: - compatible From 004d62eb4e57db3c391ed0df007cc11c93b6fbeb Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 1 Jul 2021 22:55:00 +0300 Subject: [PATCH 372/714] kvm: debugfs: fix memory leak in kvm_create_vm_debugfs In commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") loop for filling debugfs_stat_data was copy-pasted 2 times, but in the second loop pointers are saved over pointers allocated in the first loop. All this causes is a memory leak, fix it. Fixes: bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") Signed-off-by: Pavel Skripkin Reviewed-by: Jing Zhang Message-Id: <20210701195500.27097-1-paskripkin@gmail.com> Reviewed-by: Jing Zhang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f7445c3bcd90a2..6980dabe9df564 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -935,7 +935,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->desc = pdesc; stat_data->kind = KVM_STAT_VCPU; - kvm->debugfs_stat_data[i] = stat_data; + kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm->debugfs_dentry, stat_data, &stat_fops_per_vm); From 6f2f86ec28fb68cf6a342767a35f7b13703aa96f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Fri, 2 Jul 2021 13:10:42 -0700 Subject: [PATCH 373/714] KVM: selftests: Address extra memslot parameters in vm_vaddr_alloc Commit a75a895e6457 ("KVM: selftests: Unconditionally use memslot 0 for vaddr allocations") removed the memslot parameters from vm_vaddr_alloc. It addressed all callers except one under lib/aarch64/, due to a race with commit e3db7579ef35 ("KVM: selftests: Add exception handling support for aarch64") Fix the vm_vaddr_alloc call in lib/aarch64/processor.c. Reported-by: Zenghui Yu Signed-off-by: Ricardo Koller Message-Id: <20210702201042.4036162-1-ricarkol@google.com> Reviewed-by: Eric Auger Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 9f49f6caafe5d6..632b74d6b3cac3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -401,7 +401,7 @@ void route_exception(struct ex_regs *regs, int vector) void vm_init_descriptor_tables(struct kvm_vm *vm) { vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, 0, 0); + vm->page_size); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } From f85d40160691881a17a397c448d799dfc90987ba Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 29 Jun 2021 01:26:32 +0800 Subject: [PATCH 374/714] KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run() When the host is using debug registers but the guest is not using them nor is the guest in guest-debug state, the kvm code does not reset the host debug registers before kvm_x86->run(). Rather, it relies on the hardware vmentry instruction to automatically reset the dr7 registers which ensures that the host breakpoints do not affect the guest. This however violates the non-instrumentable nature around VM entry and exit; for example, when a host breakpoint is set on vcpu->arch.cr2, Another issue is consistency. When the guest debug registers are active, the host breakpoints are reset before kvm_x86->run(). But when the guest debug registers are inactive, the host breakpoints are delayed to be disabled. The host tracing tools may see different results depending on what the guest is doing. To fix the problems, we clear %db7 unconditionally before kvm_x86->run() if the host has set any breakpoints, no matter if the guest is using them or not. Signed-off-by: Lai Jiangshan Message-Id: <20210628172632.81029-1-jiangshanlai@gmail.com> Cc: stable@vger.kernel.org [Only clear %db7 instead of reloading all debug registers. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf10d196ca65ec..c471b1375f36d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9597,6 +9597,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; + } else if (unlikely(hw_breakpoint_active())) { + set_debugreg(0, 7); } for (;;) { From c0e1303ed4cc9e7ce39f106b471ad92ca559e3d3 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 8 Jul 2021 07:57:02 +0800 Subject: [PATCH 375/714] KVM: VMX: Remove vmx_msr_index from vmx.h vmx_msr_index was used to record the list of MSRs which can be lazily restored when kvm returns to userspace. It is now reimplemented as kvm_uret_msrs_list, a common x86 list which is only used inside x86.c. So just remove the obsolete declaration in vmx.h. Signed-off-by: Yu Zhang Message-Id: <20210707235702.31595-1-yu.c.zhang@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 3979a947933afb..db88ed4f2121c1 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,8 +14,6 @@ #include "vmx_ops.h" #include "cpuid.h" -extern const u32 vmx_msr_index[]; - #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 From 991afbbee8ac93b055a27477278a5fb556af1ff4 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 7 Jul 2021 15:50:58 +0300 Subject: [PATCH 376/714] KVM: SVM: #SMI interception must not skip the instruction Commit 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code"), unfortunately made a mistake of treating nop_on_interception and nop_interception in the same way. Former does truly nothing while the latter skips the instruction. SMI VM exit handler should do nothing. (SMI itself is handled by the host when we do STGI) Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code") Signed-off-by: Maxim Levitsky Message-Id: <20210707125100.677203-2-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca5614a48b2129..80790faf6370d2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2106,6 +2106,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu) return 1; } +static int smi_interception(struct kvm_vcpu *vcpu) +{ + return 1; +} + static int intr_interception(struct kvm_vcpu *vcpu) { ++vcpu->stat.irq_exits; @@ -3080,7 +3085,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, - [SVM_EXIT_SMI] = kvm_emulate_as_nop, + [SVM_EXIT_SMI] = smi_interception, [SVM_EXIT_INIT] = kvm_emulate_as_nop, [SVM_EXIT_VINTR] = interrupt_window_interception, [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, From 896707c212d440a6863ce0a3930c8a609e24497d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 7 Jul 2021 15:50:59 +0300 Subject: [PATCH 377/714] KVM: SVM: remove INIT intercept handler Kernel never sends real INIT even to CPUs, other than on boot. Thus INIT interception is an error which should be caught by a check for an unknown VMexit reason. On top of that, the current INIT VM exit handler skips the current instruction which is wrong. That was added in commit 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code"). Fixes: 5ff3a351f687 ("KVM: x86: Move trivial instruction-based exit handlers to common code") Signed-off-by: Maxim Levitsky Message-Id: <20210707125100.677203-3-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 80790faf6370d2..18aa4d45cc64a4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3086,7 +3086,6 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = smi_interception, - [SVM_EXIT_INIT] = kvm_emulate_as_nop, [SVM_EXIT_VINTR] = interrupt_window_interception, [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, [SVM_EXIT_CPUID] = kvm_emulate_cpuid, From 4b639a9f82fcf15497d1613a29aa1df798a24029 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 7 Jul 2021 15:51:00 +0300 Subject: [PATCH 378/714] KVM: SVM: add module param to control the #SMI interception In theory there are no side effects of not intercepting #SMI, because then #SMI becomes transparent to the OS and the KVM. Plus an observation on recent Zen2 CPUs reveals that these CPUs ignore #SMI interception and never deliver #SMI VMexits. This is also useful to test nested KVM to see that L1 handles #SMIs correctly in case when L1 doesn't intercept #SMI. Finally the default remains the same, the SMI are intercepted by default thus this patch doesn't have any effect unless non default module param value is used. Signed-off-by: Maxim Levitsky Message-Id: <20210707125100.677203-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 ++++ arch/x86/kvm/svm/svm.c | 10 +++++++++- arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 21d03e3a5dfd52..2884c54a72bb75 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm) for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; + + /* If SMI is not intercepted, ignore guest SMI intercept as well */ + if (!intercept_smi) + vmcb_clr_intercept(c, INTERCEPT_SMI); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 18aa4d45cc64a4..63488d3beb5b49 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -198,6 +198,11 @@ module_param(avic, bool, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); + +bool intercept_smi = true; +module_param(intercept_smi, bool, 0444); + + static bool svm_gp_erratum_intercept = true; static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_intercept(svm, INTERCEPT_INTR); svm_set_intercept(svm, INTERCEPT_NMI); - svm_set_intercept(svm, INTERCEPT_SMI); + + if (intercept_smi) + svm_set_intercept(svm, INTERCEPT_SMI); + svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0); svm_set_intercept(svm, INTERCEPT_RDPMC); svm_set_intercept(svm, INTERCEPT_CPUID); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f89b623bb591e8..8cb3bd59c5eae7 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -31,6 +31,7 @@ #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; +extern bool intercept_smi; /* * Clean bits in VMCB. From b4a693924aab93f3747465b2261add46c82c3220 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 May 2021 10:58:25 -0700 Subject: [PATCH 379/714] KVM: SVM: Return -EFAULT if copy_to_user() for SEV mig packet header fails Return -EFAULT if copy_to_user() fails; if accessing user memory faults, copy_to_user() returns the number of bytes remaining, not an error code. Reported-by: Dan Carpenter Cc: Steve Rutherford Cc: Brijesh Singh Cc: Ashish Kalra Fixes: d3d1af85e2c7 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") Signed-off-by: Sean Christopherson Message-Id: <20210506175826.2166383-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8d36f0c7307187..3dc3e2897804e8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1309,8 +1309,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) } /* Copy packet header to userspace. */ - ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, - params.hdr_len); + if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, + params.hdr_len)) + ret = -EFAULT; e_free_trans_data: kfree(trans_data); From c7a1b2b678c54ac19320daf525038d0e2e43ca7c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 May 2021 10:58:26 -0700 Subject: [PATCH 380/714] KVM: SVM: Fix sev_pin_memory() error checks in SEV migration utilities Use IS_ERR() instead of checking for a NULL pointer when querying for sev_pin_memory() failures. sev_pin_memory() always returns an error code cast to a pointer, or a valid pointer; it never returns NULL. Reported-by: Dan Carpenter Cc: Steve Rutherford Cc: Brijesh Singh Cc: Ashish Kalra Fixes: d3d1af85e2c7 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") Fixes: 15fb7de1a7f5 ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command") Signed-off-by: Sean Christopherson Message-Id: <20210506175826.2166383-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3dc3e2897804e8..02d60d7f903da8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1271,8 +1271,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Pin guest memory */ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) - return -EFAULT; + if (IS_ERR(guest_page)) + return PTR_ERR(guest_page); /* allocate memory for header and transport buffer */ ret = -ENOMEM; @@ -1463,11 +1463,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) data.trans_len = params.trans_len; /* Pin guest memory */ - ret = -EFAULT; guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) + if (IS_ERR(guest_page)) { + ret = PTR_ERR(guest_page); goto e_free_trans; + } /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; From fce7e152ffc8f89d02a80617b16c7aa1527847c8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:20 +0200 Subject: [PATCH 381/714] KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA APM states that #GP is raised upon write to MSR_VM_HSAVE_PA when the supplied address is not page-aligned or is outside of "maximum supported physical address for this implementation". page_address_valid() check seems suitable. Also, forcefully page-align the address when it's written from VMM. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210628104425.391276-2-vkuznets@redhat.com> Cc: stable@vger.kernel.org Reviewed-by: Maxim Levitsky [Add comment about behavior for host-provided values. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 63488d3beb5b49..b6e91de891cb52 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2954,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm_disable_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: - svm->nested.hsave_msr = data; + /* + * Old kernels did not validate the value written to + * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid + * value to allow live migrating buggy or malicious guests + * originating from those kernels. + */ + if (!msr->host_initiated && !page_address_valid(vcpu, data)) + return 1; + + svm->nested.hsave_msr = data & PAGE_MASK; break; case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); From fb79f566e4c99db8647cf0435e3732f12e856ab0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:21 +0200 Subject: [PATCH 382/714] KVM: nSVM: Check that VM_HSAVE_PA MSR was set before VMRUN APM states that "The address written to the VM_HSAVE_PA MSR, which holds the address of the page used to save the host state on a VMRUN, must point to a hypervisor-owned page. If this check fails, the WRMSR will fail with a #GP(0) exception. Note that a value of 0 is not considered valid for the VM_HSAVE_PA MSR and a VMRUN that is attempted while the HSAVE_PA is 0 will fail with a #GP(0) exception." svm_set_msr() already checks that the supplied address is valid, so only check for '0' is missing. Add it to nested_svm_vmrun(). Signed-off-by: Vitaly Kuznetsov Message-Id: <20210628104425.391276-3-vkuznets@redhat.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 2884c54a72bb75..ec16a06f9aa8ce 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -622,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) struct kvm_host_map map; u64 vmcb12_gpa; + if (!svm->nested.hsave_msr) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (is_smm(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; From 0a758290762cf6fb69ad09712ac834cd4f07504f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:22 +0200 Subject: [PATCH 383/714] KVM: nSVM: Introduce svm_copy_vmrun_state() Separate the code setting non-VMLOAD-VMSAVE state from svm_set_nested_state() into its own function. This is going to be re-used from svm_enter_smm()/svm_leave_smm(). Signed-off-by: Vitaly Kuznetsov Message-Id: <20210628104425.391276-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 40 +++++++++++++++++++++------------------ arch/x86/kvm/svm/svm.h | 2 ++ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index ec16a06f9aa8ce..c4296fb4b8be87 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -701,6 +701,27 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) return ret; } +/* Copy state save area fields which are handled by VMRUN */ +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save) +{ + to_save->es = from_save->es; + to_save->cs = from_save->cs; + to_save->ss = from_save->ss; + to_save->ds = from_save->ds; + to_save->gdtr = from_save->gdtr; + to_save->idtr = from_save->idtr; + to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; + to_save->efer = from_save->efer; + to_save->cr0 = from_save->cr0; + to_save->cr3 = from_save->cr3; + to_save->cr4 = from_save->cr4; + to_save->rax = from_save->rax; + to_save->rsp = from_save->rsp; + to_save->rip = from_save->rip; + to_save->cpl = 0; +} + void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; @@ -1364,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm->vmcb01.ptr->save.es = save->es; - svm->vmcb01.ptr->save.cs = save->cs; - svm->vmcb01.ptr->save.ss = save->ss; - svm->vmcb01.ptr->save.ds = save->ds; - svm->vmcb01.ptr->save.gdtr = save->gdtr; - svm->vmcb01.ptr->save.idtr = save->idtr; - svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED; - svm->vmcb01.ptr->save.efer = save->efer; - svm->vmcb01.ptr->save.cr0 = save->cr0; - svm->vmcb01.ptr->save.cr3 = save->cr3; - svm->vmcb01.ptr->save.cr4 = save->cr4; - svm->vmcb01.ptr->save.rax = save->rax; - svm->vmcb01.ptr->save.rsp = save->rsp; - svm->vmcb01.ptr->save.rip = save->rip; - svm->vmcb01.ptr->save.cpl = 0; - + svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm); - kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 8cb3bd59c5eae7..fe87fd68b73bb2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -464,6 +464,8 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); From 37be407b2ce807179108eeac788805848fe048f1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:23 +0200 Subject: [PATCH 384/714] KVM: nSVM: Fix L1 state corruption upon return from SMM VMCB split commit 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") broke return from SMM when we entered there from guest (L2) mode. Gen2 WS2016/Hyper-V is known to do this on boot. The problem manifests itself like this: kvm_exit: reason EXIT_RSM rip 0x7ffbb280 info 0 0 kvm_emulate_insn: 0:7ffbb280: 0f aa kvm_smm_transition: vcpu 0: leaving SMM, smbase 0x7ffb3000 kvm_nested_vmrun: rip: 0x000000007ffbb280 vmcb: 0x0000000008224000 nrip: 0xffffffffffbbe119 int_ctl: 0x01020000 event_inj: 0x00000000 npt: on kvm_nested_intercepts: cr_read: 0000 cr_write: 0010 excp: 40060002 intercepts: fd44bfeb 0000217f 00000000 kvm_entry: vcpu 0, rip 0xffffffffffbbe119 kvm_exit: reason EXIT_NPF rip 0xffffffffffbbe119 info 200000006 1ab000 kvm_nested_vmexit: vcpu 0 reason npf rip 0xffffffffffbbe119 info1 0x0000000200000006 info2 0x00000000001ab000 intr_info 0x00000000 error_code 0x00000000 kvm_page_fault: address 1ab000 error_code 6 kvm_nested_vmexit_inject: reason EXIT_NPF info1 200000006 info2 1ab000 int_info 0 int_info_err 0 kvm_entry: vcpu 0, rip 0x7ffbb280 kvm_exit: reason EXIT_EXCP_GP rip 0x7ffbb280 info 0 0 kvm_emulate_insn: 0:7ffbb280: 0f aa kvm_inj_exception: #GP (0x0) Note: return to L2 succeeded but upon first exit to L1 its RIP points to 'RSM' instruction but we're not in SMM. The problem appears to be that VMCB01 gets irreversibly destroyed during SMM execution. Previously, we used to have 'hsave' VMCB where regular (pre-SMM) L1's state was saved upon nested_svm_vmexit() but now we just switch to VMCB01 from VMCB02. Pre-split (working) flow looked like: - SMM is triggered during L2's execution - L2's state is pushed to SMRAM - nested_svm_vmexit() restores L1's state from 'hsave' - SMM -> RSM - enter_svm_guest_mode() switches to L2 but keeps 'hsave' intact so we have pre-SMM (and pre L2 VMRUN) L1's state there - L2's state is restored from SMRAM - upon first exit L1's state is restored from L1. This was always broken with regards to svm_get_nested_state()/ svm_set_nested_state(): 'hsave' was never a part of what's being save and restored so migration happening during SMM triggered from L2 would never restore L1's state correctly. Post-split flow (broken) looks like: - SMM is triggered during L2's execution - L2's state is pushed to SMRAM - nested_svm_vmexit() switches to VMCB01 from VMCB02 - SMM -> RSM - enter_svm_guest_mode() switches from VMCB01 to VMCB02 but pre-SMM VMCB01 is already lost. - L2's state is restored from SMRAM - upon first exit L1's state is restored from VMCB01 but it is corrupted (reflects the state during 'RSM' execution). VMX doesn't have this problem because unlike VMCB, VMCS keeps both guest and host state so when we switch back to VMCS02 L1's state is intact there. To resolve the issue we need to save L1's state somewhere. We could've created a third VMCB for SMM but that would require us to modify saved state format. L1's architectural HSAVE area (pointed by MSR_VM_HSAVE_PA) seems appropriate: L0 is free to save any (or none) of L1's state there. Currently, KVM does 'none'. Note, for nested state migration to succeed, both source and destination hypervisors must have the fix. We, however, don't need to create a new flag indicating the fact that HSAVE area is now populated as migration during SMM triggered from L2 was always broken. Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b6e91de891cb52..cf8471890266ac 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4309,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map_save; int ret; if (is_guest_mode(vcpu)) { @@ -4324,6 +4325,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ret = nested_svm_vmexit(svm); if (ret) return ret; + + /* + * KVM uses VMCB01 to store L1 host state while L2 runs but + * VMCB01 is going to be used during SMM and thus the state will + * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save + * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the + * format of the area is identical to guest save area offsetted + * by 0x400 (matches the offset of 'struct vmcb_save_area' + * within 'struct vmcb'). Note: HSAVE area may also be used by + * L1 hypervisor to save additional host context (e.g. KVM does + * that, see svm_prepare_guest_switch()) which must be + * preserved. + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); + + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); + + kvm_vcpu_unmap(vcpu, &map_save, true); } return 0; } @@ -4331,7 +4355,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_host_map map; + struct kvm_host_map map, map_save; int ret = 0; if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { @@ -4355,6 +4379,19 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva); kvm_vcpu_unmap(vcpu, &map, true); + + /* + * Restore L1 host state from L1 HSAVE area as VMCB01 was + * used during SMM (see svm_enter_smm()) + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); + + kvm_vcpu_unmap(vcpu, &map_save, true); } } From bb00bd9c0862558c6528e3ac97470aee222436ef Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:24 +0200 Subject: [PATCH 385/714] KVM: nSVM: Restore nested control upon leaving SMM If the VM was migrated while in SMM, no nested state was saved/restored, and therefore svm_leave_smm has to load both save and control area of the vmcb12. Save area is already loaded from HSAVE area, so now load the control area as well from the vmcb12. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210628104425.391276-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 ++-- arch/x86/kvm/svm/svm.c | 7 ++++++- arch/x86/kvm/svm/svm.h | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index c4296fb4b8be87..3bd09c50c98b63 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -308,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, return true; } -static void nested_load_control_from_vmcb12(struct vcpu_svm *svm, - struct vmcb_control_area *control) +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control) { copy_vmcb_control_area(&svm->nested.ctl, control); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cf8471890266ac..664d20f0689c8b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4362,6 +4362,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); + struct vmcb *vmcb12; if (guest) { if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) @@ -4377,7 +4378,11 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (svm_allocate_nested(svm)) return 1; - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva); + vmcb12 = map.hva; + + nested_load_control_from_vmcb12(svm, &vmcb12->control); + + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); kvm_vcpu_unmap(vcpu, &map, true); /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fe87fd68b73bb2..7e2090752d8fce 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -482,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control); void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); From d951b2210c1ad2dc08345bb8d97e5a172a15261e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 28 Jun 2021 12:44:25 +0200 Subject: [PATCH 386/714] KVM: selftests: smm_test: Test SMM enter from L2 Two additional tests are added: - SMM triggered from L2 does not currupt L1 host state. - Save/restore during SMM triggered from L2 does not corrupt guest/host state. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210628104425.391276-7-vkuznets@redhat.com> Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/smm_test.c | 70 +++++++++++++++++-- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index c1f831803ad2df..d0fe2fdce58c47 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -53,15 +53,28 @@ static inline void sync_with_host(uint64_t phase) : "+a" (phase)); } -void self_smi(void) +static void self_smi(void) { x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); } -void guest_code(void *arg) +static void l2_guest_code(void) { + sync_with_host(8); + + sync_with_host(10); + + vmcall(); +} + +static void guest_code(void *arg) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + struct svm_test_data *svm = arg; + struct vmx_pages *vmx_pages = arg; sync_with_host(1); @@ -74,21 +87,50 @@ void guest_code(void *arg) sync_with_host(4); if (arg) { - if (cpu_has_svm()) - generic_svm_setup(arg, NULL, NULL); - else - GUEST_ASSERT(prepare_for_vmx_operation(arg)); + if (cpu_has_svm()) { + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } else { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } sync_with_host(5); self_smi(); sync_with_host(7); + + if (cpu_has_svm()) { + run_guest(svm->vmcb, svm->vmcb_gpa); + svm->vmcb->save.rip += 3; + run_guest(svm->vmcb, svm->vmcb_gpa); + } else { + vmlaunch(); + vmresume(); + } + + /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ + sync_with_host(12); } sync_with_host(DONE); } +void inject_smi(struct kvm_vm *vm) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vm, VCPU_ID, &events); + + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + + vcpu_events_set(vm, VCPU_ID, &events); +} + int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; @@ -147,6 +189,22 @@ int main(int argc, char *argv[]) "Unexpected stage: #%x, got %x", stage, stage_reported); + /* + * Enter SMM during L2 execution and check that we correctly + * return from it. Do not perform save/restore while in SMM yet. + */ + if (stage == 8) { + inject_smi(vm); + continue; + } + + /* + * Perform save/restore while the guest is in SMM triggered + * during L2 execution. + */ + if (stage == 10) + inject_smi(vm); + state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); kvm_vm_restart(vm, O_RDWR); From e8917266ae0944385d50da5e691c89f64c5975a3 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Jul 2021 13:34:53 -0600 Subject: [PATCH 387/714] dt-bindings: More dropping redundant minItems/maxItems Another round of removing redundant minItems/maxItems from new schema in the recent merge window. If a property has an 'items' list, then a 'minItems' or 'maxItems' with the same size as the list is redundant and can be dropped. Note that is DT schema specific behavior and not standard json-schema behavior. The tooling will fixup the final schema adding any unspecified minItems/maxItems. This condition is partially checked with the meta-schema already, but only if both 'minItems' and 'maxItems' are equal to the 'items' length. An improved meta-schema is pending. Cc: Stephen Boyd Cc: Joerg Roedel Cc: Will Deacon Cc: Krzysztof Kozlowski Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: Greg Kroah-Hartman Cc: Sureshkumar Relli Cc: Brian Norris Cc: Kamal Dasu Cc: Linus Walleij Cc: Sebastian Siewior Cc: Laurent Pinchart Cc: linux-clk@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: linux-mtd@lists.infradead.org Cc: linux-rtc@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Greg Kroah-Hartman Acked-by: Alexandre Belloni Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210713193453.690290-1-robh@kernel.org --- .../devicetree/bindings/clock/brcm,iproc-clocks.yaml | 1 - .../devicetree/bindings/iommu/rockchip,iommu.yaml | 2 -- .../bindings/memory-controllers/arm,pl353-smc.yaml | 1 - Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml | 8 -------- .../devicetree/bindings/rtc/faraday,ftrtc010.yaml | 1 - Documentation/devicetree/bindings/usb/nxp,isp1760.yaml | 2 -- 6 files changed, 15 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml index 8dc7b404ee12b1..1174c9aa99340f 100644 --- a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml +++ b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml @@ -50,7 +50,6 @@ properties: reg: minItems: 1 - maxItems: 3 items: - description: base register - description: power register diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml index d2e28a9e354513..ba9124f721f151 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -28,14 +28,12 @@ properties: - description: configuration registers for MMU instance 0 - description: configuration registers for MMU instance 1 minItems: 1 - maxItems: 2 interrupts: items: - description: interruption for MMU instance 0 - description: interruption for MMU instance 1 minItems: 1 - maxItems: 2 clocks: items: diff --git a/Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml b/Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml index 7a63c85ef8c586..01c9acf9275df8 100644 --- a/Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml @@ -57,7 +57,6 @@ properties: ranges: minItems: 1 - maxItems: 3 description: | Memory bus areas for interacting with the devices. Reflects the memory layout with four integer values following: diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml index e5f1a33332a5af..dd5a64969e3784 100644 --- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml +++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml @@ -84,7 +84,6 @@ properties: interrupts: minItems: 1 - maxItems: 3 items: - description: NAND CTLRDY interrupt - description: FLASH_DMA_DONE if flash DMA is available @@ -92,7 +91,6 @@ properties: interrupt-names: minItems: 1 - maxItems: 3 items: - const: nand_ctlrdy - const: flash_dma_done @@ -148,8 +146,6 @@ allOf: then: properties: reg-names: - minItems: 2 - maxItems: 2 items: - const: nand - const: nand-int-base @@ -161,8 +157,6 @@ allOf: then: properties: reg-names: - minItems: 3 - maxItems: 3 items: - const: nand - const: nand-int-base @@ -175,8 +169,6 @@ allOf: then: properties: reg-names: - minItems: 3 - maxItems: 3 items: - const: nand - const: iproc-idm diff --git a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml index 657c13b62b6793..056d42daae06da 100644 --- a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml +++ b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml @@ -30,7 +30,6 @@ properties: maxItems: 1 clocks: - minItems: 2 items: - description: PCLK clocks - description: EXTCLK clocks. Faraday calls it CLK1HZ and says the clock diff --git a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml index a88f99adfe8ec2..f238848ad094d8 100644 --- a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml +++ b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml @@ -25,14 +25,12 @@ properties: interrupts: minItems: 1 - maxItems: 2 items: - description: Host controller interrupt - description: Device controller interrupt in isp1761 interrupt-names: minItems: 1 - maxItems: 2 items: - const: host - const: peripheral From f88321a3bf775649c685379a25fb9f3f79836bfd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Jul 2021 13:35:14 -0600 Subject: [PATCH 388/714] dt-bindings: Move fixed string 'patternProperties' to 'properties' There's no need for fixed strings to be under 'patternProperties', so move them under 'properties' instead. Cc: Jean Delvare Cc: Guenter Roeck Cc: Kishon Vijay Abraham I Cc: Vinod Koul Cc: Saravanan Sekar Cc: Mark Brown Cc: Jagan Teki Cc: Troy Kisky Cc: linux-hwmon@vger.kernel.org Cc: linux-phy@lists.infradead.org Cc: linux-spi@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Mark Brown Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20210713193514.690894-1-robh@kernel.org --- .../devicetree/bindings/hwmon/adt7475.yaml | 22 ++++---- .../bindings/phy/ti,phy-j721e-wiz.yaml | 56 +++++++++---------- .../bindings/regulator/mps,mpq7920.yaml | 6 +- .../regulator/nxp,pf8x00-regulator.yaml | 3 +- .../bindings/spi/spi-controller.yaml | 32 +++++------ 5 files changed, 60 insertions(+), 59 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/adt7475.yaml b/Documentation/devicetree/bindings/hwmon/adt7475.yaml index ad0ec9f35bd8ea..7d9c083632b941 100644 --- a/Documentation/devicetree/bindings/hwmon/adt7475.yaml +++ b/Documentation/devicetree/bindings/hwmon/adt7475.yaml @@ -39,17 +39,7 @@ properties: reg: maxItems: 1 -patternProperties: - "^adi,bypass-attenuator-in[0-4]$": - description: | - Configures bypassing the individual voltage input attenuator. If - set to 1 the attenuator is bypassed if set to 0 the attenuator is - not bypassed. If the property is absent then the attenuator - retains it's configuration from the bios/bootloader. - $ref: /schemas/types.yaml#/definitions/uint32 - enum: [0, 1] - - "^adi,pwm-active-state$": + adi,pwm-active-state: description: | Integer array, represents the active state of the pwm outputs If set to 0 the pwm uses a logic low output for 100% duty cycle. If set to 1 the pwm @@ -61,6 +51,16 @@ patternProperties: enum: [0, 1] default: 1 +patternProperties: + "^adi,bypass-attenuator-in[0-4]$": + description: | + Configures bypassing the individual voltage input attenuator. If + set to 1 the attenuator is bypassed if set to 0 the attenuator is + not bypassed. If the property is absent then the attenuator + retains it's configuration from the bios/bootloader. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index 5272b6f284ba24..dcd63908aeaefe 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -77,6 +77,34 @@ properties: Type-C spec states minimum CC pin debounce of 100 ms and maximum of 200 ms. However, some solutions might need more than 200 ms. + refclk-dig: + type: object + description: | + WIZ node should have subnode for refclk_dig to select the reference + clock source for the reference clock used in the PHY and PMA digital + logic. + properties: + clocks: + minItems: 2 + maxItems: 4 + description: Phandle to two (Torrent) or four (Sierra) clock nodes representing + the inputs to refclk_dig + + "#clock-cells": + const: 0 + + assigned-clocks: + maxItems: 1 + + assigned-clock-parents: + maxItems: 1 + + required: + - clocks + - "#clock-cells" + - assigned-clocks + - assigned-clock-parents + patternProperties: "^pll[0|1]-refclk$": type: object @@ -121,34 +149,6 @@ patternProperties: - clocks - "#clock-cells" - "^refclk-dig$": - type: object - description: | - WIZ node should have subnode for refclk_dig to select the reference - clock source for the reference clock used in the PHY and PMA digital - logic. - properties: - clocks: - minItems: 2 - maxItems: 4 - description: Phandle to two (Torrent) or four (Sierra) clock nodes representing - the inputs to refclk_dig - - "#clock-cells": - const: 0 - - assigned-clocks: - maxItems: 1 - - assigned-clock-parents: - maxItems: 1 - - required: - - clocks - - "#clock-cells" - - assigned-clocks - - assigned-clock-parents - "^serdes@[0-9a-f]+$": type: object description: | diff --git a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml index 12b8963615c381..c2e8c54e531121 100644 --- a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml +++ b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml @@ -36,12 +36,12 @@ properties: switching frequency must be one of following corresponding value 1.1MHz, 1.65MHz, 2.2MHz, 2.75MHz - patternProperties: - "^ldo[1-4]$": + ldortc: type: object $ref: regulator.yaml# - "^ldortc$": + patternProperties: + "^ldo[1-4]$": type: object $ref: regulator.yaml# diff --git a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml index 8761437ed8adc3..aabf50f5b39e97 100644 --- a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml @@ -83,7 +83,8 @@ properties: unevaluatedProperties: false - "^vsnvs$": + properties: + vsnvs: type: object $ref: regulator.yaml# description: diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml index faef4f6f55b851..8246891602e776 100644 --- a/Documentation/devicetree/bindings/spi/spi-controller.yaml +++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml @@ -79,22 +79,7 @@ properties: description: The SPI controller acts as a slave, instead of a master. -allOf: - - if: - not: - required: - - spi-slave - then: - properties: - "#address-cells": - const: 1 - else: - properties: - "#address-cells": - const: 0 - -patternProperties: - "^slave$": + slave: type: object properties: @@ -105,6 +90,7 @@ patternProperties: required: - compatible +patternProperties: "^.*@[0-9a-f]+$": type: object @@ -180,6 +166,20 @@ patternProperties: - compatible - reg +allOf: + - if: + not: + required: + - spi-slave + then: + properties: + "#address-cells": + const: 1 + else: + properties: + "#address-cells": + const: 0 + additionalProperties: true examples: From 58b63e0f556c2debb8c942abcc9e6beadc4a07f0 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 6 Jul 2021 09:07:34 +0800 Subject: [PATCH 389/714] pd: fix order of cleaning up the queue and freeing the tagset We must release the queue before freeing the tagset. Fixes: 262d431f9000 ("pd: use blk_mq_alloc_disk and blk_cleanup_disk") Signed-off-by: Guoqing Jiang Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210706010734.1356066-1-guoqing.jiang@linux.dev Signed-off-by: Jens Axboe --- drivers/block/paride/pd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 3b2b8e872beb6e..9b3298926356d8 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -1014,8 +1014,8 @@ static void __exit pd_exit(void) if (p) { disk->gd = NULL; del_gendisk(p); - blk_mq_free_tag_set(&disk->tag_set); blk_cleanup_disk(p); + blk_mq_free_tag_set(&disk->tag_set); pi_release(disk->pi); } } From 16ad3db3b24cd9f70aa24e93cef0d4a83dece7ac Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Tue, 6 Jul 2021 12:00:16 +0800 Subject: [PATCH 390/714] nbd: fix order of cleaning up the queue and freeing the tagset We must release the queue before freeing the tagset. Fixes: 4af5f2e03013 ("nbd: use blk_mq_alloc_disk and blk_cleanup_disk") Reported-and-tested-by: syzbot+9ca43ff47167c0ee3466@syzkaller.appspotmail.com Signed-off-by: Wang Qing Signed-off-by: Guoqing Jiang Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210706040016.1360412-1-guoqing.jiang@linux.dev Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b7d663736d35b3..c38317979f74ec 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -239,8 +239,8 @@ static void nbd_dev_remove(struct nbd_device *nbd) if (disk) { del_gendisk(disk); - blk_mq_free_tag_set(&nbd->tag_set); blk_cleanup_disk(disk); + blk_mq_free_tag_set(&nbd->tag_set); } /* From 05d69d950d9d84218fc9beafd02dea1f6a70e09e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Jul 2021 16:17:11 +0200 Subject: [PATCH 391/714] xen-blkfront: sanitize the removal state machine xen-blkfront has a weird protocol where close message from the remote side can be delayed, and where hot removals are treated somewhat differently from regular removals, all leading to potential NULL pointer removals, and a del_gendisk from the block device release method, which will deadlock. Fix this by just performing normal hot removals even when the device is opened like all other Linux block drivers. Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk") Reported-by: Vitaly Kuznetsov Signed-off-by: Christoph Hellwig Tested-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20210715141711.1257293-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 224 ++++------------------------------- 1 file changed, 26 insertions(+), 198 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8d49f8fa98bbe6..d83fee21f6c59c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) static int blkif_ioctl(struct block_device *bdev, fmode_t mode, unsigned command, unsigned long argument) { - struct blkfront_info *info = bdev->bd_disk->private_data; int i; - dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", - command, (long)argument); - switch (command) { case CDROMMULTISESSION: - dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); for (i = 0; i < sizeof(struct cdrom_multisession); i++) if (put_user(0, (char __user *)(argument + i))) return -EFAULT; return 0; - - case CDROM_GET_CAPABILITY: { - struct gendisk *gd = info->gd; - if (gd->flags & GENHD_FL_CD) + case CDROM_GET_CAPABILITY: + if (bdev->bd_disk->flags & GENHD_FL_CD) return 0; return -EINVAL; - } - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ + return -EINVAL; } - - return 0; } static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, @@ -1177,36 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, return err; } -static void xlvbd_release_gendisk(struct blkfront_info *info) -{ - unsigned int minor, nr_minors, i; - struct blkfront_ring_info *rinfo; - - if (info->rq == NULL) - return; - - /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - - for_each_rinfo(info, rinfo, i) { - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&rinfo->callback); - - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work(&rinfo->work); - } - - del_gendisk(info->gd); - - minor = info->gd->first_minor; - nr_minors = info->gd->minors; - xlbd_release_minors(minor, nr_minors); - - blk_cleanup_disk(info->gd); - info->gd = NULL; - blk_mq_free_tag_set(&info->tag_set); -} - /* Already hold rinfo->ring_lock. */ static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo) { @@ -1756,12 +1713,6 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt, return err; } -static void free_info(struct blkfront_info *info) -{ - list_del(&info->info_list); - kfree(info); -} - /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1880,13 +1831,6 @@ static int talk_to_blkback(struct xenbus_device *dev, xenbus_dev_fatal(dev, err, "%s", message); destroy_blkring: blkif_free(info, 0); - - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - - dev_set_drvdata(&dev->dev, NULL); - return err; } @@ -2126,38 +2070,26 @@ static int blkfront_resume(struct xenbus_device *dev) static void blkfront_closing(struct blkfront_info *info) { struct xenbus_device *xbdev = info->xbdev; - struct block_device *bdev = NULL; - - mutex_lock(&info->mutex); + struct blkfront_ring_info *rinfo; + unsigned int i; - if (xbdev->state == XenbusStateClosing) { - mutex_unlock(&info->mutex); + if (xbdev->state == XenbusStateClosing) return; - } - if (info->gd) - bdev = bdgrab(info->gd->part0); - - mutex_unlock(&info->mutex); - - if (!bdev) { - xenbus_frontend_closed(xbdev); - return; - } + /* No more blkif_request(). */ + blk_mq_stop_hw_queues(info->rq); + blk_set_queue_dying(info->rq); + set_capacity(info->gd, 0); - mutex_lock(&bdev->bd_disk->open_mutex); + for_each_rinfo(info, rinfo, i) { + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&rinfo->callback); - if (bdev->bd_openers) { - xenbus_dev_error(xbdev, -EBUSY, - "Device in use; refusing to close"); - xenbus_switch_state(xbdev, XenbusStateClosing); - } else { - xlvbd_release_gendisk(info); - xenbus_frontend_closed(xbdev); + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_work(&rinfo->work); } - mutex_unlock(&bdev->bd_disk->open_mutex); - bdput(bdev); + xenbus_frontend_closed(xbdev); } static void blkfront_setup_discard(struct blkfront_info *info) @@ -2472,8 +2404,7 @@ static void blkback_changed(struct xenbus_device *dev, break; fallthrough; case XenbusStateClosing: - if (info) - blkfront_closing(info); + blkfront_closing(info); break; } } @@ -2481,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev, static int blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); - struct block_device *bdev = NULL; - struct gendisk *disk; dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - if (!info) - return 0; - - blkif_free(info, 0); - - mutex_lock(&info->mutex); - - disk = info->gd; - if (disk) - bdev = bdgrab(disk->part0); - - info->xbdev = NULL; - mutex_unlock(&info->mutex); - - if (!bdev) { - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - return 0; - } - - /* - * The xbdev was removed before we reached the Closed - * state. See if it's safe to remove the disk. If the bdev - * isn't closed yet, we let release take care of it. - */ - - mutex_lock(&disk->open_mutex); - info = disk->private_data; - - dev_warn(disk_to_dev(disk), - "%s was hot-unplugged, %d stale handles\n", - xbdev->nodename, bdev->bd_openers); + del_gendisk(info->gd); - if (info && !bdev->bd_openers) { - xlvbd_release_gendisk(info); - disk->private_data = NULL; - mutex_lock(&blkfront_mutex); - free_info(info); - mutex_unlock(&blkfront_mutex); - } + mutex_lock(&blkfront_mutex); + list_del(&info->info_list); + mutex_unlock(&blkfront_mutex); - mutex_unlock(&disk->open_mutex); - bdput(bdev); + blkif_free(info, 0); + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + kfree(info); return 0; } @@ -2541,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev) return info->is_ready && info->xbdev; } -static int blkif_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct blkfront_info *info; - int err = 0; - - mutex_lock(&blkfront_mutex); - - info = disk->private_data; - if (!info) { - /* xbdev gone */ - err = -ERESTARTSYS; - goto out; - } - - mutex_lock(&info->mutex); - - if (!info->gd) - /* xbdev is closed */ - err = -ERESTARTSYS; - - mutex_unlock(&info->mutex); - -out: - mutex_unlock(&blkfront_mutex); - return err; -} - -static void blkif_release(struct gendisk *disk, fmode_t mode) -{ - struct blkfront_info *info = disk->private_data; - struct xenbus_device *xbdev; - - mutex_lock(&blkfront_mutex); - if (disk->part0->bd_openers) - goto out_mutex; - - /* - * Check if we have been instructed to close. We will have - * deferred this request, because the bdev was still open. - */ - - mutex_lock(&info->mutex); - xbdev = info->xbdev; - - if (xbdev && xbdev->state == XenbusStateClosing) { - /* pending switch to state closed */ - dev_info(disk_to_dev(disk), "releasing disk\n"); - xlvbd_release_gendisk(info); - xenbus_frontend_closed(info->xbdev); - } - - mutex_unlock(&info->mutex); - - if (!xbdev) { - /* sudden device removal */ - dev_info(disk_to_dev(disk), "releasing disk\n"); - xlvbd_release_gendisk(info); - disk->private_data = NULL; - free_info(info); - } - -out_mutex: - mutex_unlock(&blkfront_mutex); -} - static const struct block_device_operations xlvbd_block_fops = { .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, .getgeo = blkif_getgeo, .ioctl = blkif_ioctl, .compat_ioctl = blkdev_compat_ptr_ioctl, From 295cf156231ca3f9e3a66bde7fab5e09c41835e0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jul 2021 15:27:46 +0100 Subject: [PATCH 392/714] arm64: Avoid premature usercopy failure Al reminds us that the usercopy API must only return complete failure if absolutely nothing could be copied. Currently, if userspace does something silly like giving us an unaligned pointer to Device memory, or a size which overruns MTE tag bounds, we may fail to honour that requirement when faulting on a multi-byte access even though a smaller access could have succeeded. Add a mitigation to the fixup routines to fall back to a single-byte copy if we faulted on a larger access before anything has been written to the destination, to guarantee making *some* forward progress. We needn't be too concerned about the overall performance since this should only occur when callers are doing something a bit dodgy in the first place. Particularly broken userspace might still be able to trick generic_perform_write() into an infinite loop by targeting write() at an mmap() of some read-only device register where the fault-in load succeeds but any store synchronously aborts such that copy_to_user() is genuinely unable to make progress, but, well, don't do that... CC: stable@vger.kernel.org Reported-by: Chen Huang Suggested-by: Al Viro Reviewed-by: Catalin Marinas Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/dc03d5c675731a1f24a62417dba5429ad744234e.1626098433.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/copy_from_user.S | 13 ++++++++++--- arch/arm64/lib/copy_in_user.S | 21 ++++++++++++++------- arch/arm64/lib/copy_to_user.S | 14 +++++++++++--- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 95cd62d6737113..2cf999e41d30e7 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -29,7 +29,7 @@ .endm .macro ldrh1 reg, ptr, val - user_ldst 9998f, ldtrh, \reg, \ptr, \val + user_ldst 9997f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val @@ -37,7 +37,7 @@ .endm .macro ldr1 reg, ptr, val - user_ldst 9998f, ldtr, \reg, \ptr, \val + user_ldst 9997f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val @@ -45,7 +45,7 @@ .endm .macro ldp1 reg1, reg2, ptr, val - user_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val @@ -53,8 +53,10 @@ .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 // Nothing to copy ret @@ -63,6 +65,11 @@ EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) + strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 1f61cd0df0627b..dbea3799c3efb6 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -30,33 +30,34 @@ .endm .macro ldrh1 reg, ptr, val - user_ldst 9998f, ldtrh, \reg, \ptr, \val + user_ldst 9997f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val - user_ldst 9998f, sttrh, \reg, \ptr, \val + user_ldst 9997f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val - user_ldst 9998f, ldtr, \reg, \ptr, \val + user_ldst 9997f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val - user_ldst 9998f, sttr, \reg, \ptr, \val + user_ldst 9997f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val - user_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val - user_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 - +srcin .req x15 SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -65,6 +66,12 @@ EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 043da90f5dd7df..9f380eecf65317 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -32,7 +32,7 @@ .endm .macro strh1 reg, ptr, val - user_ldst 9998f, sttrh, \reg, \ptr, \val + user_ldst 9997f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val @@ -40,7 +40,7 @@ .endm .macro str1 reg, ptr, val - user_ldst 9998f, sttr, \reg, \ptr, \val + user_ldst 9997f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val @@ -48,12 +48,14 @@ .endm .macro stp1 reg1, reg2, ptr, val - user_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -62,6 +64,12 @@ EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder + ldrb tmp1w, [srcin] +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous From 59f44069e0527523f27948da7b77599a73dab157 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Jul 2021 15:38:41 +0100 Subject: [PATCH 393/714] arm64: mte: fix restoration of GCR_EL1 from suspend Since commit: bad1e1c663e0a72f ("arm64: mte: switch GCR_EL1 in kernel entry and exit") we saved/restored the user GCR_EL1 value at exception boundaries, and update_gcr_el1_excl() is no longer used for this. However it is used to restore the kernel's GCR_EL1 value when returning from a suspend state. Thus, the comment is misleading (and an ISB is necessary). When restoring the kernel's GCR value, we need an ISB to ensure this is used by subsequent instructions. We don't necessarily get an ISB by other means (e.g. if the kernel is built without support for pointer authentication). As __cpu_setup() initialised GCR_EL1.Exclude to 0xffff, until a context synchronization event, allocation tag 0 may be used rather than the desired set of tags. This patch drops the misleading comment, adds the missing ISB, and for clarity folds update_gcr_el1_excl() into its only user. Fixes: bad1e1c663e0 ("arm64: mte: switch GCR_EL1 in kernel entry and exit") Signed-off-by: Mark Rutland Cc: Andrey Konovalov Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lore.kernel.org/r/20210714143843.56537-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/mte.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 69b3fde8759e40..36f51b0e438a6e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -193,18 +193,6 @@ void mte_check_tfsr_el1(void) } #endif -static void update_gcr_el1_excl(u64 excl) -{ - - /* - * Note that the mask controlled by the user via prctl() is an - * include while GCR_EL1 accepts an exclude mask. - * No need for ISB since this only affects EL0 currently, implicit - * with ERET. - */ - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); -} - static void set_gcr_el1_excl(u64 excl) { current->thread.gcr_user_excl = excl; @@ -265,7 +253,8 @@ void mte_suspend_exit(void) if (!system_supports_mte()) return; - update_gcr_el1_excl(gcr_kernel_excl); + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl); + isb(); } long set_mte_ctrl(struct task_struct *task, unsigned long arg) From 31a7f0f6c8f392f002c937f34f372943cf8be5a9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Jul 2021 18:28:01 +0100 Subject: [PATCH 394/714] arm64: entry: add missing noinstr We intend that all the early exception handling code is marked as `noinstr`, but we forgot this for __el0_error_handler_common(), which is called before we have completed entry from user mode. If it were instrumented, we could run into problems with RCU, lockdep, etc. Mark it as `noinstr` to prevent this. The few other functions in entry-common.c which do not have `noinstr` are called once we've completed entry, and are safe to instrument. Fixes: bb8e93a287a5 ("arm64: entry: convert SError handlers to C") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Joey Gouly Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210714172801.16475-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 12ce14a98b7c4f..db8b2e2d02c235 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -604,7 +604,7 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs) __el0_fiq_handler_common(regs); } -static void __el0_error_handler_common(struct pt_regs *regs) +static void noinstr __el0_error_handler_common(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); From e6f85cbeb23bd74b8966cf1f15bf7d01399ff625 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Jul 2021 13:30:49 +0100 Subject: [PATCH 395/714] arm64: entry: fix KCOV suppression We suppress KCOV for entry.o rather than entry-common.o. As entry.o is built from entry.S, this is pointless, and permits instrumentation of entry-common.o, which is built from entry-common.c. Fix the Makefile to suppress KCOV for entry-common.o, as we had intended to begin with. I've verified with objdump that this is working as expected. Fixes: bf6fa2c0dda7 ("arm64: entry: don't instrument entry code with KCOV") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20210715123049.9990-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cce308586fcc44..3f1490bfb938a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,7 @@ CFLAGS_syscall.o += -fno-stack-protector # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. -KCOV_INSTRUMENT_entry.o := n +KCOV_INSTRUMENT_entry-common.o := n KCOV_INSTRUMENT_idle.o := n # Object file lists. From 3ac1d426510f97ace05093ae9f2f710d9cbe6215 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Jul 2021 09:58:04 -0700 Subject: [PATCH 396/714] iomap: remove the length variable in iomap_seek_data The length variable is rather pointless given that it can be trivially deduced from offset and size. Also the initial calculation can lead to KASAN warnings. Signed-off-by: Christoph Hellwig Reported-by: Leizhen (ThunderTown) Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Matthew Wilcox (Oracle) --- fs/iomap/seek.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c index dab1b02eba5b7f..50b8f1418f2668 100644 --- a/fs/iomap/seek.c +++ b/fs/iomap/seek.c @@ -83,27 +83,23 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops) { loff_t size = i_size_read(inode); - loff_t length = size - offset; loff_t ret; /* Nothing to be found before or beyond the end of the file. */ if (offset < 0 || offset >= size) return -ENXIO; - while (length > 0) { - ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops, - &offset, iomap_seek_data_actor); + while (offset < size) { + ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT, + ops, &offset, iomap_seek_data_actor); if (ret < 0) return ret; if (ret == 0) - break; - + return offset; offset += ret; - length -= ret; } - if (length <= 0) - return -ENXIO; - return offset; + /* We've reached the end of the file without finding data */ + return -ENXIO; } EXPORT_SYMBOL_GPL(iomap_seek_data); From 49694d14ff68fa4b5f86019dbcfb44a8bd213e58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Jul 2021 09:58:04 -0700 Subject: [PATCH 397/714] iomap: remove the length variable in iomap_seek_hole The length variable is rather pointless given that it can be trivially deduced from offset and size. Also the initial calculation can lead to KASAN warnings. Signed-off-by: Christoph Hellwig Reported-by: Leizhen (ThunderTown) Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Matthew Wilcox (Oracle) --- fs/iomap/seek.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c index 50b8f1418f2668..ce6fb810854fec 100644 --- a/fs/iomap/seek.c +++ b/fs/iomap/seek.c @@ -35,23 +35,20 @@ loff_t iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops) { loff_t size = i_size_read(inode); - loff_t length = size - offset; loff_t ret; /* Nothing to be found before or beyond the end of the file. */ if (offset < 0 || offset >= size) return -ENXIO; - while (length > 0) { - ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops, - &offset, iomap_seek_hole_actor); + while (offset < size) { + ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT, + ops, &offset, iomap_seek_hole_actor); if (ret < 0) return ret; if (ret == 0) break; - offset += ret; - length -= ret; } return offset; From 8e1bcef8e18d0fec4afe527c074bb1fd6c2b140c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 15 Jul 2021 09:58:05 -0700 Subject: [PATCH 398/714] iomap: Permit pages without an iop to enter writeback Create an iop in the writeback path if one doesn't exist. This allows us to avoid creating the iop in some cases. We'll initially do that for pages with inline data, but it can be extended to pages which are entirely within an extent. It also allows for an iop to be removed from pages in the future (eg page split). Co-developed-by: Matthew Wilcox (Oracle) Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 41da4f14c00bb5..ad976634d56b36 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1304,14 +1304,13 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, struct page *page, u64 end_offset) { - struct iomap_page *iop = to_iomap_page(page); + struct iomap_page *iop = iomap_page_create(inode, page); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); u64 file_offset; /* file offset of page */ int error = 0, count = 0, i; LIST_HEAD(submit_list); - WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0); /* From 637d3375953e052a62c0db409557e3b3354be88a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 15 Jul 2021 09:58:05 -0700 Subject: [PATCH 399/714] iomap: Don't create iomap_page objects for inline files In iomap_readpage_actor, don't create iop objects for inline inodes. Otherwise, iomap_read_inline_data will set PageUptodate without setting iop->uptodate, and iomap_page_release will eventually complain. To prevent this kind of bug from occurring in the future, make sure the page doesn't have private data attached in iomap_read_inline_data. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ad976634d56b36..41076bab18cb4d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -215,6 +215,7 @@ iomap_read_inline_data(struct inode *inode, struct page *page, if (PageUptodate(page)) return; + BUG_ON(page_has_private(page)); BUG_ON(page->index); BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); @@ -239,7 +240,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, { struct iomap_readpage_ctx *ctx = data; struct page *page = ctx->cur_page; - struct iomap_page *iop = iomap_page_create(inode, page); + struct iomap_page *iop; bool same_page = false, is_contig = false; loff_t orig_pos = pos; unsigned poff, plen; @@ -252,6 +253,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, } /* zero post-eof blocks as the page may be mapped */ + iop = iomap_page_create(inode, page); iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); if (plen == 0) goto done; From 229adf3c64dbeae4e2f45fb561907ada9fcc0d0c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 15 Jul 2021 09:58:06 -0700 Subject: [PATCH 400/714] iomap: Don't create iomap_page objects in iomap_page_mkwrite_actor Now that we create those objects in iomap_writepage_map when needed, there's no need to pre-create them in iomap_page_mkwrite_actor anymore. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 41076bab18cb4d..87ccb3438becd9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -969,7 +969,6 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, block_commit_write(page, 0, length); } else { WARN_ON_ONCE(!PageUptodate(page)); - iomap_page_create(inode, page); set_page_dirty(page); } From da062d16a897c0759ae907e786bc0bea950c0c9d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:47 -0700 Subject: [PATCH 401/714] xfs: check for sparse inode clusters that cross new EOAG when shrinking While running xfs/168, I noticed occasional write verifier shutdowns involving inodes at the very end of the filesystem. Existing inode btree validation code checks that all inode clusters are fully contained within the filesystem. However, due to inadequate checking in the fs shrink code, it's possible that there could be a sparse inode cluster at the end of the filesystem where the upper inodes of the cluster are marked as holes and the corresponding blocks are free. In this case, the last blocks in the AG are listed in the bnobt. This enables the shrink to proceed but results in a filesystem that trips the inode verifiers. Fix this by disallowing the shrink. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang --- fs/xfs/libxfs/xfs_ag.c | 8 ++++++ fs/xfs/libxfs/xfs_ialloc.c | 55 ++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_ialloc.h | 3 +++ 3 files changed, 66 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 778ec52cce70f3..ee9ec0c50bec03 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -803,6 +803,14 @@ xfs_ag_shrink_space( args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); + /* + * Make sure that the last inode cluster cannot overlap with the new + * end of the AG, even if it's sparse. + */ + error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta); + if (error) + return error; + /* * Disable perag reservations so it doesn't cause the allocation request * to fail. We'll reestablish reservation before we return. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 57d9cb63298383..aaf8805a82df08 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino( return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); } + +/* + * Ensure there are not sparse inode clusters that cross the new EOAG. + * + * This is a no-op for non-spinode filesystems since clusters are always fully + * allocated and checking the bnobt suffices. However, a spinode filesystem + * could have a record where the upper inodes are free blocks. If those blocks + * were removed from the filesystem, the inode record would extend beyond EOAG, + * which will be flagged as corruption. + */ +int +xfs_ialloc_check_shrink( + struct xfs_trans *tp, + xfs_agnumber_t agno, + struct xfs_buf *agibp, + xfs_agblock_t new_length) +{ + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length); + int has; + int error; + + if (!xfs_sb_version_hassparseinodes(&mp->m_sb)) + return 0; + + pag = xfs_perag_get(mp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO); + + /* Look up the inobt record that would correspond to the new EOFS. */ + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); + if (error || !has) + goto out; + + error = xfs_inobt_get_rec(cur, &rec, &has); + if (error) + goto out; + + if (!has) { + error = -EFSCORRUPTED; + goto out; + } + + /* If the record covers inodes that would be beyond EOFS, bail out. */ + if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) { + error = -ENOSPC; + goto out; + } +out: + xfs_btree_del_cursor(cur, error); + xfs_perag_put(pag); + return error; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 9df7c80408ffc5..9a2112b4ad5eee 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); +int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno, + struct xfs_buf *agibp, xfs_agblock_t new_length); + #endif /* __XFS_IALLOC_H__ */ From 5838d0356bb3c320867c393f12b169c01a870bda Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:48 -0700 Subject: [PATCH 402/714] xfs: reset child dir '..' entry when unlinking child While running xfs/168, I noticed a second source of post-shrink corruption errors causing shutdowns. Let's say that directory B has a low inode number and is a child of directory A, which has a high number. If B is empty but open, and unlinked from A, B's dotdot link continues to point to A. If A is then unlinked and the filesystem shrunk so that A is no longer a valid inode, a subsequent AIL push of B will trip the inode verifiers because the dotdot entry points outside of the filesystem. To avoid this problem, reset B's dotdot entry to the root directory when unlinking directories, since the root directory cannot be removed. Signed-off-by: Darrick J. Wong Reviewed-by: Gao Xiang --- fs/xfs/xfs_inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a835ceb79ba55a..990b72ae36350a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2763,6 +2763,19 @@ xfs_remove( error = xfs_droplink(tp, ip); if (error) goto out_trans_cancel; + + /* + * Point the unlinked child directory's ".." entry to the root + * directory to eliminate back-references to inodes that may + * get freed before the child directory is closed. If the fs + * gets shrunk, this can lead to dirent inode validation errors. + */ + if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, + tp->t_mountp->m_sb.sb_rootino, 0); + if (error) + return error; + } } else { /* * When removing a non-directory we need to log the parent From 83193e5ebb0164d612aa620ceab7d3746e80e2a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:50 -0700 Subject: [PATCH 403/714] xfs: correct the narrative around misaligned rtinherit/extszinherit dirs While auditing the realtime growfs code, I realized that the GROWFSRT ioctl (and by extension xfs_growfs) has always allowed sysadmins to change the realtime extent size when adding a realtime section to the filesystem. Since we also have always allowed sysadmins to set RTINHERIT and EXTSZINHERIT on directories even if there is no realtime device, this invalidates the premise laid out in the comments added in commit 603f000b15f2. In other words, this is not a case of inadequate metadata validation. This is a case of nearly forgotten (and apparently untested) but supported functionality. Update the comments to reflect what we've learned, and remove the log message about correcting the misalignment. Fixes: 603f000b15f2 ("xfs: validate extsz hints against rt extent size when rtinherit is set") Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_buf.c | 28 ++++++++++++++++------------ fs/xfs/libxfs/xfs_trans_inode.c | 10 ++++------ fs/xfs/xfs_ioctl.c | 8 ++++---- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 04ce361688f777..84ea2e0af9f026 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -592,23 +592,27 @@ xfs_inode_validate_extsize( /* * This comment describes a historic gap in this verifier function. * - * On older kernels, the extent size hint verifier doesn't check that - * the extent size hint is an integer multiple of the realtime extent - * size on a directory with both RTINHERIT and EXTSZINHERIT flags set. - * The verifier has always enforced the alignment rule for regular - * files with the REALTIME flag set. + * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this + * function has never checked that the extent size hint is an integer + * multiple of the realtime extent size. Since we allow users to set + * this combination on non-rt filesystems /and/ to change the rt + * extent size when adding a rt device to a filesystem, the net effect + * is that users can configure a filesystem anticipating one rt + * geometry and change their minds later. Directories do not use the + * extent size hint, so this is harmless for them. * * If a directory with a misaligned extent size hint is allowed to * propagate that hint into a new regular realtime file, the result * is that the inode cluster buffer verifier will trigger a corruption - * shutdown the next time it is run. + * shutdown the next time it is run, because the verifier has always + * enforced the alignment rule for regular files. * - * Unfortunately, there could be filesystems with these misconfigured - * directories in the wild, so we cannot add a check to this verifier - * at this time because that will result a new source of directory - * corruption errors when reading an existing filesystem. Instead, we - * permit the misconfiguration to pass through the verifiers so that - * callers of this function can correct and mitigate externally. + * Because we allow administrators to set a new rt extent size when + * adding a rt section, we cannot add a check to this verifier because + * that will result a new source of directory corruption errors when + * reading an existing filesystem. Instead, we rely on callers to + * decide when alignment checks are appropriate, and fix things up as + * needed. */ if (rt_flag) diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 8d595a5c4abd1d..16f723ebe8dda9 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -143,16 +143,14 @@ xfs_trans_log_inode( } /* - * Inode verifiers on older kernels don't check that the extent size - * hint is an integer multiple of the rt extent size on a directory - * with both rtinherit and extszinherit flags set. If we're logging a - * directory that is misconfigured in this way, clear the hint. + * Inode verifiers do not check that the extent size hint is an integer + * multiple of the rt extent size on a directory with both rtinherit + * and extszinherit flags set. If we're logging a directory that is + * misconfigured in this way, clear the hint. */ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { - xfs_info_once(ip->i_mount, - "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino); ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT); ip->i_extsize = 0; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 65270e63c032a0..275d1c3123bdad 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1292,10 +1292,10 @@ xfs_ioctl_setattr_check_extsize( new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); /* - * Inode verifiers on older kernels don't check that the extent size - * hint is an integer multiple of the rt extent size on a directory - * with both rtinherit and extszinherit flags set. Don't let sysadmins - * misconfigure directories. + * Inode verifiers do not check that the extent size hint is an integer + * multiple of the rt extent size on a directory with both rtinherit + * and extszinherit flags set. Don't let sysadmins misconfigure + * directories. */ if ((new_diflags & XFS_DIFLAG_RTINHERIT) && (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) { From 5aa5b278237f356f86205c4b03d4cc64a293850a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:51 -0700 Subject: [PATCH 404/714] xfs: don't expose misaligned extszinherit hints to userspace Commit 603f000b15f2 changed xfs_ioctl_setattr_check_extsize to reject an attempt to set an EXTSZINHERIT extent size hint on a directory with RTINHERIT set if the hint isn't a multiple of the realtime extent size. However, I have recently discovered that it is possible to change the realtime extent size when adding a rt device to a filesystem, which means that the existence of directories with misaligned inherited hints is not an accident. As a result, it's possible that someone could have set a valid hint and added an rt volume with a different rt extent size, which invalidates the ondisk hints. After such a sequence, FSGETXATTR will report a misaligned hint, which FSSETXATTR will trip over, causing confusion if the user was doing the usual GET/SET sequence to change some other attribute. Change xfs_fill_fsxattr to omit the hint if it isn't aligned properly. Fixes: 603f000b15f2 ("xfs: validate extsz hints against rt extent size when rtinherit is set") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ioctl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 275d1c3123bdad..16039ea10ac99c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1065,7 +1065,24 @@ xfs_fill_fsxattr( fileattr_fill_xflags(fa, xfs_ip2xflags(ip)); - fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) { + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); + } else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { + /* + * Don't let a misaligned extent size hint on a directory + * escape to userspace if it won't pass the setattr checks + * later. + */ + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && + ip->i_extsize % mp->m_sb.sb_rextsize > 0) { + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | + FS_XFLAG_EXTSZINHERIT); + fa->fsx_extsize = 0; + } else { + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); + } + } + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize); fa->fsx_projid = ip->i_projid; From 0e2af9296f4f9c4c815ced2beb21093af7c38644 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:48 -0700 Subject: [PATCH 405/714] xfs: improve FSGROWFSRT precondition checking Improve the checking at the start of a realtime grow operation so that we avoid accidentally set a new extent size that is too large and avoid adding an rt volume to a filesystem with rmap or reflink because we don't support rt rmap or reflink yet. While we're at it, separate the checks so that we're only testing one aspect at a time. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_rtalloc.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 4e7be6b4ca8e85..8f6a05db44686e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -923,16 +923,41 @@ xfs_growfs_rt( uint8_t *rsum_cache; /* old summary cache */ sbp = &mp->m_sb; - /* - * Initial error checking. - */ + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || - (nrblocks = in->newblocks) <= sbp->sb_rblocks || - (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) + + /* Needs to have been mounted with an rt device. */ + if (!XFS_IS_REALTIME_MOUNT(mp)) + return -EINVAL; + /* + * Mount should fail if the rt bitmap/summary files don't load, but + * we'll check anyway. + */ + if (!mp->m_rbmip || !mp->m_rsumip) + return -EINVAL; + + /* Shrink not supported. */ + if (in->newblocks <= sbp->sb_rblocks) + return -EINVAL; + + /* Can only change rt extent size when adding rt volume. */ + if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) + return -EINVAL; + + /* Range check the extent size. */ + if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE || + XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) return -EINVAL; - if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) + + /* Unsupported realtime features. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb) || + xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + nrblocks = in->newblocks; + error = xfs_sb_validate_fsb_count(sbp, nrblocks); + if (error) return error; /* * Read in the last block of the device, make sure it exists. From 0925fecc557471b6f6a488c3590a275151210572 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jul 2021 12:58:49 -0700 Subject: [PATCH 406/714] xfs: fix an integer overflow error in xfs_growfs_rt During a realtime grow operation, we run a single transaction for each rt bitmap block added to the filesystem. This means that each step has to be careful to increase sb_rblocks appropriately. Fix the integer overflow error in this calculation that can happen when the extent size is very large. Found by running growfs to add a rt volume to a filesystem formatted with a 1g rt extent size. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_rtalloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 8f6a05db44686e..699066fb9052d3 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1021,7 +1021,8 @@ xfs_growfs_rt( ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); bmbno < nrbmblocks; bmbno++) { - xfs_trans_t *tp; + struct xfs_trans *tp; + xfs_rfsblock_t nrblocks_step; *nmp = *mp; nsbp = &nmp->m_sb; @@ -1030,10 +1031,9 @@ xfs_growfs_rt( */ nsbp->sb_rextsize = in->extsize; nsbp->sb_rbmblocks = bmbno + 1; - nsbp->sb_rblocks = - XFS_RTMIN(nrblocks, - nsbp->sb_rbmblocks * NBBY * - nsbp->sb_blocksize * nsbp->sb_rextsize); + nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize * + nsbp->sb_rextsize; + nsbp->sb_rblocks = min(nrblocks, nrblocks_step); nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); ASSERT(nsbp->sb_rextents != 0); From b102a46ce16fd5550aed882c3c5b95f50da7992c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Jul 2021 09:03:41 -0700 Subject: [PATCH 407/714] xfs: detect misaligned rtinherit directory extent size hints If we encounter a directory that has been configured to pass on an extent size hint to a new realtime file and the hint isn't an integer multiple of the rt extent size, we should flag the hint for administrative review because that is a misconfiguration (that other parts of the kernel will fix automatically). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/inode.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 61f90b2c943057..76fbc7ca4cec46 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -73,11 +73,25 @@ xchk_inode_extsize( uint16_t flags) { xfs_failaddr_t fa; + uint32_t value = be32_to_cpu(dip->di_extsize); - fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), - mode, flags); + fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags); if (fa) xchk_ino_set_corrupt(sc, ino); + + /* + * XFS allows a sysadmin to change the rt extent size when adding a rt + * section to a filesystem after formatting. If there are any + * directories with extszinherit and rtinherit set, the hint could + * become misaligned with the new rextsize. The verifier doesn't check + * this, because we allow rtinherit directories even without an rt + * device. Flag this as an administrative warning since we will clean + * this up eventually. + */ + if ((flags & XFS_DIFLAG_RTINHERIT) && + (flags & XFS_DIFLAG_EXTSZINHERIT) && + value % sc->mp->m_sb.sb_rextsize > 0) + xchk_ino_set_warning(sc, ino); } /* From 99bb2ebab953435852340cdb198c5abbf0bb5dd3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Jul 2021 11:58:12 +0200 Subject: [PATCH 408/714] net: dsa: mv88e6xxx: NET_DSA_MV88E6XXX_PTP should depend on NET_DSA_MV88E6XXX Making global2 support mandatory removed the Kconfig symbol NET_DSA_MV88E6XXX_GLOBAL2. This symbol also served as an intermediate symbol to make NET_DSA_MV88E6XXX_PTP depend on NET_DSA_MV88E6XXX. With the symbol removed, the user is always asked about PTP support for Marvell 88E6xxx switches, even if the latter support is not enabled. Fix this by reinstating the dependency. Fixes: 63368a7416df144b ("net: dsa: mv88e6xxx: Make global2 support mandatory") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 05af632b0f597d..634a48e6616b95 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -12,7 +12,7 @@ config NET_DSA_MV88E6XXX config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on PTP_1588_CLOCK + depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. From 0d4a062af2cea33c2000b28420e8e2eb58b4fd0b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 14 Jul 2021 21:26:34 -0700 Subject: [PATCH 409/714] mm: move helper to check slub_debug_enabled Move the helper to check slub_debug_enabled, so that we can confine the use of #ifdef outside slub.c as well. Link: https://lkml.kernel.org/r/20210705103229.8505-2-yee.lee@mediatek.com Signed-off-by: Marco Elver Signed-off-by: Yee Lee Suggested-by: Matthew Wilcox Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Chinwen Chang Cc: Dmitry Vyukov Cc: Kuan-Ying Lee Cc: Nicholas Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.h | 15 +++++++++++---- mm/slub.c | 14 -------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 67e06637ff2eed..f997fd5e42c863 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -216,10 +216,18 @@ DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); #endif extern void print_tracking(struct kmem_cache *s, void *object); long validate_slab_cache(struct kmem_cache *s); +static inline bool __slub_debug_enabled(void) +{ + return static_branch_unlikely(&slub_debug_enabled); +} #else static inline void print_tracking(struct kmem_cache *s, void *object) { } +static inline bool __slub_debug_enabled(void) +{ + return false; +} #endif /* @@ -229,11 +237,10 @@ static inline void print_tracking(struct kmem_cache *s, void *object) */ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) { -#ifdef CONFIG_SLUB_DEBUG - VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); - if (static_branch_unlikely(&slub_debug_enabled)) + if (IS_ENABLED(CONFIG_SLUB_DEBUG)) + VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); + if (__slub_debug_enabled()) return s->flags & flags; -#endif return false; } diff --git a/mm/slub.c b/mm/slub.c index dc863c1ea3243e..e1644ac6ee7bf7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -120,25 +120,11 @@ */ #ifdef CONFIG_SLUB_DEBUG - #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); #else DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); #endif - -static inline bool __slub_debug_enabled(void) -{ - return static_branch_unlikely(&slub_debug_enabled); -} - -#else /* CONFIG_SLUB_DEBUG */ - -static inline bool __slub_debug_enabled(void) -{ - return false; -} - #endif /* CONFIG_SLUB_DEBUG */ static inline bool kmem_cache_debug(struct kmem_cache *s) From 77a63c69ec43f4dc28f4b2d1c933c39e55de6ad8 Mon Sep 17 00:00:00 2001 From: Yee Lee Date: Wed, 14 Jul 2021 21:26:37 -0700 Subject: [PATCH 410/714] kasan: add memzero init for unaligned size at DEBUG Issue: when SLUB debug is on, hwtag kasan_unpoison() would overwrite the redzone of object with unaligned size. An additional memzero_explicit() path is added to replacing init by hwtag instruction for those unaligned size at SLUB debug mode. The penalty is acceptable since they are only enabled in debug mode, not production builds. A block of comment is added for explanation. Link: https://lkml.kernel.org/r/20210705103229.8505-3-yee.lee@mediatek.com Signed-off-by: Yee Lee Suggested-by: Andrey Konovalov Suggested-by: Marco Elver Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Nicholas Tang Cc: Kuan-Ying Lee Cc: Chinwen Chang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 98e3059bfea455..d739cdd1621ab4 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -9,6 +9,7 @@ #ifdef CONFIG_KASAN_HW_TAGS #include +#include "../slab.h" DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); extern bool kasan_flag_async __ro_after_init; @@ -387,6 +388,17 @@ static inline void kasan_unpoison(const void *addr, size_t size, bool init) if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK)) return; + /* + * Explicitly initialize the memory with the precise object size to + * avoid overwriting the SLAB redzone. This disables initialization in + * the arch code and may thus lead to performance penalty. The penalty + * is accepted since SLAB redzones aren't enabled in production builds. + */ + if (__slub_debug_enabled() && + init && ((unsigned long)size & KASAN_GRANULE_MASK)) { + init = false; + memzero_explicit((void *)addr, size); + } size = round_up(size, KASAN_GRANULE_SIZE); hw_set_mem_tag_range((void *)addr, size, tag, init); From 2db710cc846d3321a4dc0977fa13769bddba2351 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 14 Jul 2021 21:26:40 -0700 Subject: [PATCH 411/714] kasan: fix build by including kernel.h The header relies on _RET_IP_ being defined, and had been receiving that definition via inclusion of bug.h which includes kernel.h. However, since f39650de687e ("kernel.h: split out panic and oops helpers") that is no longer the case and get the following build error when building CONFIG_KASAN_HW_TAGS on arm64: In file included from arch/arm64/mm/kasan_init.c:10: include/linux/kasan.h: In function 'kasan_slab_free': include/linux/kasan.h:230:39: error: '_RET_IP_' undeclared (first use in this function) 230 | return __kasan_slab_free(s, object, _RET_IP_, init); Fix it by including kernel.h from kasan.h. Link: https://lkml.kernel.org/r/20210705072716.2125074-1-elver@google.com Fixes: f39650de687e ("kernel.h: split out panic and oops helpers") Signed-off-by: Marco Elver Reviewed-by: Andy Shevchenko Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Peter Collingbourne Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5310e217bd747c..dd874a1ee862a3 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include +#include #include #include From 54aa386661fef92b5f092d7068bc6d4952b91a71 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 14 Jul 2021 21:26:43 -0700 Subject: [PATCH 412/714] Revert "mm/page_alloc: make should_fail_alloc_page() static" This reverts commit f7173090033c70886d925995e9dfdfb76dbb2441. Fix an unresolved symbol error when CONFIG_DEBUG_INFO_BTF=y: LD vmlinux BTFIDS vmlinux FAILED unresolved symbol should_fail_alloc_page make: *** [Makefile:1199: vmlinux] Error 255 make: *** Deleting file 'vmlinux' Link: https://lkml.kernel.org/r/20210708191128.153796-1-mcroce@linux.microsoft.com Fixes: f7173090033c ("mm/page_alloc: make should_fail_alloc_page() static") Signed-off-by: Matteo Croce Acked-by: Mel Gorman Tested-by: John Hubbard Cc: Michal Hocko Cc: David Hildenbrand Cc: Vlastimil Babka Cc: Dan Streetman Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3b97e17806be7f..bf3117bb1e162e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3820,7 +3820,7 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) #endif /* CONFIG_FAIL_PAGE_ALLOC */ -static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) +noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { return __should_fail_alloc_page(gfp_mask, order); } From 187ad460b8413e863c951998cb321a117a717868 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 14 Jul 2021 21:26:46 -0700 Subject: [PATCH 413/714] mm/page_alloc: avoid page allocator recursion with pagesets.lock held Syzbot is reporting potential deadlocks due to pagesets.lock when PAGE_OWNER is enabled. One example from Desmond Cheong Zhi Xi is as follows __alloc_pages_bulk() local_lock_irqsave(&pagesets.lock, flags) <---- outer lock here prep_new_page(): post_alloc_hook(): set_page_owner(): __set_page_owner(): save_stack(): stack_depot_save(): alloc_pages(): alloc_page_interleave(): __alloc_pages(): get_page_from_freelist(): rm_queue(): rm_queue_pcplist(): local_lock_irqsave(&pagesets.lock, flags); *** DEADLOCK *** Zhang, Qiang also reported BUG: sleeping function called from invalid context at mm/page_alloc.c:5179 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 ..... __dump_stack lib/dump_stack.c:79 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:96 ___might_sleep.cold+0x1f1/0x237 kernel/sched/core.c:9153 prepare_alloc_pages+0x3da/0x580 mm/page_alloc.c:5179 __alloc_pages+0x12f/0x500 mm/page_alloc.c:5375 alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2147 alloc_pages+0x238/0x2a0 mm/mempolicy.c:2270 stack_depot_save+0x39d/0x4e0 lib/stackdepot.c:303 save_stack+0x15e/0x1e0 mm/page_owner.c:120 __set_page_owner+0x50/0x290 mm/page_owner.c:181 prep_new_page mm/page_alloc.c:2445 [inline] __alloc_pages_bulk+0x8b9/0x1870 mm/page_alloc.c:5313 alloc_pages_bulk_array_node include/linux/gfp.h:557 [inline] vm_area_alloc_pages mm/vmalloc.c:2775 [inline] __vmalloc_area_node mm/vmalloc.c:2845 [inline] __vmalloc_node_range+0x39d/0x960 mm/vmalloc.c:2947 __vmalloc_node mm/vmalloc.c:2996 [inline] vzalloc+0x67/0x80 mm/vmalloc.c:3066 There are a number of ways it could be fixed. The page owner code could be audited to strip GFP flags that allow sleeping but it'll impair the functionality of PAGE_OWNER if allocations fail. The bulk allocator could add a special case to release/reacquire the lock for prep_new_page and lookup PCP after the lock is reacquired at the cost of performance. The pages requiring prep could be tracked using the least significant bit and looping through the array although it is more complicated for the list interface. The options are relatively complex and the second one still incurs a performance penalty when PAGE_OWNER is active so this patch takes the simple approach -- disable bulk allocation of PAGE_OWNER is active. The caller will be forced to allocate one page at a time incurring a performance penalty but PAGE_OWNER is already a performance penalty. Link: https://lkml.kernel.org/r/20210708081434.GV3840@techsingularity.net Fixes: dbbee9d5cd83 ("mm/page_alloc: convert per-cpu list protection to local_lock") Signed-off-by: Mel Gorman Reported-by: Desmond Cheong Zhi Xi Reported-by: "Zhang, Qiang" Reported-by: syzbot+127fd7828d6eeb611703@syzkaller.appspotmail.com Tested-by: syzbot+127fd7828d6eeb611703@syzkaller.appspotmail.com Acked-by: Rafael Aquini Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bf3117bb1e162e..84609716bab56a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5239,6 +5239,18 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (nr_pages - nr_populated == 1) goto failed; +#ifdef CONFIG_PAGE_OWNER + /* + * PAGE_OWNER may recurse into the allocator to allocate space to + * save the stack with pagesets.lock held. Releasing/reacquiring + * removes much of the performance benefit of bulk allocation so + * force the caller to allocate one page at a time as it'll have + * similar performance to added complexity to the bulk allocator. + */ + if (static_branch_unlikely(&page_owner_inited)) + goto failed; +#endif + /* May set ALLOC_NOFRAGMENT, fragmentation will return 1 page. */ gfp &= gfp_allowed_mask; alloc_gfp = gfp; From e5c15cea339115edf99dc92282865f173cf84510 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Wed, 14 Jul 2021 21:26:49 -0700 Subject: [PATCH 414/714] mm/page_alloc: correct return value when failing at preparing If the array passed in is already partially populated, we should return "nr_populated" even failing at preparing arguments stage. Link: https://lkml.kernel.org/r/20210713152100.10381-3-mgorman@techsingularity.net Signed-off-by: Yanfei Xu Signed-off-by: Mel Gorman Link: https://lore.kernel.org/r/20210709102855.55058-1-yanfei.xu@windriver.com Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 84609716bab56a..4d79e357a4bab7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5255,7 +5255,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, gfp &= gfp_allowed_mask; alloc_gfp = gfp; if (!prepare_alloc_pages(gfp, 0, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags)) - return 0; + return nr_populated; gfp = alloc_gfp; /* Find an allowed local zone that meets the low watermark. */ From 061478438d04779181c2ce4d7ffeeca343a70a98 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 14 Jul 2021 21:26:52 -0700 Subject: [PATCH 415/714] mm/page_alloc: further fix __alloc_pages_bulk() return value The author of commit b3b64ebd3822 ("mm/page_alloc: do bulk array bounds check after checking populated elements") was possibly confused by the mixture of return values throughout the function. The API contract is clear that the function "Returns the number of pages on the list or array." It does not list zero as a unique return value with a special meaning. Therefore zero is a plausible return value only if @nr_pages is zero or less. Clean up the return logic to make it clear that the returned value is always the total number of pages in the array/list, not the number of pages that were allocated during this call. The only change in behavior with this patch is the value returned if prepare_alloc_pages() fails. To match the API contract, the number of pages currently in the array/list is returned in this case. The call site in __page_pool_alloc_pages_slow() also seems to be confused on this matter. It should be attended to by someone who is familiar with that code. [mel@techsingularity.net: Return nr_populated if 0 pages are requested] Link: https://lkml.kernel.org/r/20210713152100.10381-4-mgorman@techsingularity.net Signed-off-by: Chuck Lever Signed-off-by: Mel Gorman Acked-by: Jesper Dangaard Brouer Cc: Desmond Cheong Zhi Xi Cc: Zhang Qiang Cc: Yanfei Xu Cc: Matteo Croce Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4d79e357a4bab7..3e97e68aef7a89 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5221,9 +5221,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, unsigned int alloc_flags = ALLOC_WMARK_LOW; int nr_populated = 0, nr_account = 0; - if (unlikely(nr_pages <= 0)) - return 0; - /* * Skip populated array elements to determine if any pages need * to be allocated before disabling IRQs. @@ -5231,9 +5228,13 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, while (page_array && nr_populated < nr_pages && page_array[nr_populated]) nr_populated++; + /* No pages requested? */ + if (unlikely(nr_pages <= 0)) + goto out; + /* Already populated array? */ if (unlikely(page_array && nr_pages - nr_populated == 0)) - return nr_populated; + goto out; /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) @@ -5255,7 +5256,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, gfp &= gfp_allowed_mask; alloc_gfp = gfp; if (!prepare_alloc_pages(gfp, 0, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags)) - return nr_populated; + goto out; gfp = alloc_gfp; /* Find an allowed local zone that meets the low watermark. */ @@ -5323,6 +5324,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); +out: return nr_populated; failed_irq: @@ -5338,7 +5340,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nr_populated++; } - return nr_populated; + goto out; } EXPORT_SYMBOL_GPL(__alloc_pages_bulk); From ab7965de1725cd8514f0edbced5c2fb793846078 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jul 2021 21:26:55 -0700 Subject: [PATCH 416/714] mm: fix the try_to_unmap prototype for !CONFIG_MMU Adjust the nommu stub of try_to_unmap to match the changed protype for the full version. Turn it into an inline instead of a macro to generally improve the type checking. Link: https://lkml.kernel.org/r/20210705053944.885828-1-hch@lst.de Fixes: 1fb08ac63bee ("mm: rmap: make try_to_unmap() void function") Signed-off-by: Christoph Hellwig Reviewed-by: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 83fb86133fe194..c976cc6de25747 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -291,7 +291,9 @@ static inline int page_referenced(struct page *page, int is_locked, return 0; } -#define try_to_unmap(page, refs) false +static inline void try_to_unmap(struct page *page, enum ttu_flags flags) +{ +} static inline int page_mkclean(struct page *page) { From c52114d9df6a193fba5317933c75bc9bb5f6cf8a Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 14 Jul 2021 21:26:58 -0700 Subject: [PATCH 417/714] lib/test_hmm: remove set but unused page variable The HMM selftests use atomic_check_access() to check atomic access to a page has been revoked. It doesn't matter if the page mapping has been removed from the mirrored page tables as that also implies atomic access has been revoked. Therefore remove the unused page variable to fix this compiler warning: lib/test_hmm.c:631:16: warning: variable `page' set but not used [-Wunused-but-set-variable] Link: https://lkml.kernel.org/r/20210706025603.4059-1-apopple@nvidia.com Fixes: b659baea7546 ("mm: selftests for exclusive device memory") Signed-off-by: Alistair Popple Reported-by: Hulk Robot Reported-by: kernel test robot Reported-by: Yang Yingliang Acked-by: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hmm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 8c55c47236929a..c259842f6d443c 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -628,10 +628,8 @@ static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { void *entry; - struct page *page; entry = xa_load(&dmirror->pt, pfn); - page = xa_untag_pointer(entry); if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) return -EPERM; } From 16ee572eaf0d09daa4c8a755fdb71e40dbf8562d Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Wed, 14 Jul 2021 21:27:01 -0700 Subject: [PATCH 418/714] hfs: add missing clean-up in hfs_fill_super Patch series "hfs: fix various errors", v2. This series ultimately aims to address a lockdep warning in hfs_find_init reported by Syzbot [1]. The work done for this led to the discovery of another bug, and the Syzkaller repro test also reveals an invalid memory access error after clearing the lockdep warning. Hence, this series is broken up into three patches: 1. Add a missing call to hfs_find_exit for an error path in hfs_fill_super 2. Fix memory mapping in hfs_bnode_read by fixing calls to kmap 3. Add lock nesting notation to tell lockdep that the observed locking hierarchy is safe This patch (of 3): Before exiting hfs_fill_super, the struct hfs_find_data used in hfs_find_init should be passed to hfs_find_exit to be cleaned up, and to release the lock held on the btree. The call to hfs_find_exit is missing from an error path. We add it back in by consolidating calls to hfs_find_exit for error paths. Link: https://syzkaller.appspot.com/bug?id=f007ef1d7a31a469e3be7aeb0fde0769b18585db [1] Link: https://lkml.kernel.org/r/20210701030756.58760-1-desmondcheongzx@gmail.com Link: https://lkml.kernel.org/r/20210701030756.58760-2-desmondcheongzx@gmail.com Signed-off-by: Desmond Cheong Zhi Xi Reviewed-by: Viacheslav Dubeyko Cc: Gustavo A. R. Silva Cc: Al Viro Cc: Shuah Khan Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 44d07c9e3a7f03..12d9bae393631c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -420,14 +420,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!res) { if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { res = -EIO; - goto bail; + goto bail_hfs_find; } hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); } - if (res) { - hfs_find_exit(&fd); - goto bail_no_root; - } + if (res) + goto bail_hfs_find; res = -EINVAL; root_inode = hfs_iget(sb, &fd.search_key->cat, &rec); hfs_find_exit(&fd); @@ -443,6 +441,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) /* everything's okay */ return 0; +bail_hfs_find: + hfs_find_exit(&fd); bail_no_root: pr_err("get root inode failed\n"); bail: From 54a5ead6f5e2b47131a7385d0c0af18e7b89cb02 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Wed, 14 Jul 2021 21:27:05 -0700 Subject: [PATCH 419/714] hfs: fix high memory mapping in hfs_bnode_read Pages that we read in hfs_bnode_read need to be kmapped into kernel address space. However, currently only the 0th page is kmapped. If the given offset + length exceeds this 0th page, then we have an invalid memory access. To fix this, we kmap relevant pages one by one and copy their relevant portions of data. An example of invalid memory access occurring without this fix can be seen in the following crash report: ================================================================== BUG: KASAN: use-after-free in memcpy include/linux/fortify-string.h:191 [inline] BUG: KASAN: use-after-free in hfs_bnode_read+0xc4/0xe0 fs/hfs/bnode.c:26 Read of size 2 at addr ffff888125fdcffe by task syz-executor5/4634 CPU: 0 PID: 4634 Comm: syz-executor5 Not tainted 5.13.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x195/0x1f8 lib/dump_stack.c:120 print_address_description.constprop.0+0x1d/0x110 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x7b/0xd4 mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:180 [inline] kasan_check_range+0x154/0x1b0 mm/kasan/generic.c:186 memcpy+0x24/0x60 mm/kasan/shadow.c:65 memcpy include/linux/fortify-string.h:191 [inline] hfs_bnode_read+0xc4/0xe0 fs/hfs/bnode.c:26 hfs_bnode_read_u16 fs/hfs/bnode.c:34 [inline] hfs_bnode_find+0x880/0xcc0 fs/hfs/bnode.c:365 hfs_brec_find+0x2d8/0x540 fs/hfs/bfind.c:126 hfs_brec_read+0x27/0x120 fs/hfs/bfind.c:165 hfs_cat_find_brec+0x19a/0x3b0 fs/hfs/catalog.c:194 hfs_fill_super+0xc13/0x1460 fs/hfs/super.c:419 mount_bdev+0x331/0x3f0 fs/super.c:1368 hfs_mount+0x35/0x40 fs/hfs/super.c:457 legacy_get_tree+0x10c/0x220 fs/fs_context.c:592 vfs_get_tree+0x93/0x300 fs/super.c:1498 do_new_mount fs/namespace.c:2905 [inline] path_mount+0x13f5/0x20e0 fs/namespace.c:3235 do_mount fs/namespace.c:3248 [inline] __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount fs/namespace.c:3433 [inline] __x64_sys_mount+0x2b8/0x340 fs/namespace.c:3433 do_syscall_64+0x37/0xc0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x45e63a Code: 48 c7 c2 bc ff ff ff f7 d8 64 89 02 b8 ff ff ff ff eb d2 e8 88 04 00 00 0f 1f 84 00 00 00 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f9404d410d8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000020000248 RCX: 000000000045e63a RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007f9404d41120 RBP: 00007f9404d41120 R08: 00000000200002c0 R09: 0000000020000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 R13: 0000000000000003 R14: 00000000004ad5d8 R15: 0000000000000000 The buggy address belongs to the page: page:00000000dadbcf3e refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x125fdc flags: 0x2fffc0000000000(node=0|zone=2|lastcpupid=0x3fff) raw: 02fffc0000000000 ffffea000497f748 ffffea000497f6c8 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888125fdce80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888125fdcf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff888125fdcf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888125fdd000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888125fdd080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Link: https://lkml.kernel.org/r/20210701030756.58760-3-desmondcheongzx@gmail.com Signed-off-by: Desmond Cheong Zhi Xi Reviewed-by: Viacheslav Dubeyko Cc: Al Viro Cc: Greg Kroah-Hartman Cc: Gustavo A. R. Silva Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/bnode.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index b63a4df7327b6d..c0a73a6ffb28bd 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -15,16 +15,31 @@ #include "btree.h" -void hfs_bnode_read(struct hfs_bnode *node, void *buf, - int off, int len) +void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) { struct page *page; + int pagenum; + int bytes_read; + int bytes_to_read; + void *vaddr; off += node->page_offset; - page = node->page[0]; + pagenum = off >> PAGE_SHIFT; + off &= ~PAGE_MASK; /* compute page offset for the first page */ - memcpy(buf, kmap(page) + off, len); - kunmap(page); + for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) { + if (pagenum >= node->tree->pages_per_bnode) + break; + page = node->page[pagenum]; + bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off); + + vaddr = kmap_atomic(page); + memcpy(buf + bytes_read, vaddr + off, bytes_to_read); + kunmap_atomic(vaddr); + + pagenum++; + off = 0; /* page offset only applies to the first page */ + } } u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off) From b3b2177a2d795e35dc11597b2609eb1e7e57e570 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Wed, 14 Jul 2021 21:27:08 -0700 Subject: [PATCH 420/714] hfs: add lock nesting notation to hfs_find_init Syzbot reports a possible recursive lock in [1]. This happens due to missing lock nesting information. From the logs, we see that a call to hfs_fill_super is made to mount the hfs filesystem. While searching for the root inode, the lock on the catalog btree is grabbed. Then, when the parent of the root isn't found, a call to __hfs_bnode_create is made to create the parent of the root. This eventually leads to a call to hfs_ext_read_extent which grabs a lock on the extents btree. Since the order of locking is catalog btree -> extents btree, this lock hierarchy does not lead to a deadlock. To tell lockdep that this locking is safe, we add nesting notation to distinguish between catalog btrees, extents btrees, and attributes btrees (for HFS+). This has already been done in hfsplus. Link: https://syzkaller.appspot.com/bug?id=f007ef1d7a31a469e3be7aeb0fde0769b18585db [1] Link: https://lkml.kernel.org/r/20210701030756.58760-4-desmondcheongzx@gmail.com Signed-off-by: Desmond Cheong Zhi Xi Reported-by: syzbot+b718ec84a87b7e73ade4@syzkaller.appspotmail.com Tested-by: syzbot+b718ec84a87b7e73ade4@syzkaller.appspotmail.com Reviewed-by: Viacheslav Dubeyko Cc: Al Viro Cc: Greg Kroah-Hartman Cc: Gustavo A. R. Silva Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfs/bfind.c | 14 +++++++++++++- fs/hfs/btree.h | 7 +++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 4af318fbda774c..ef9498a6e88acd 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -25,7 +25,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->key = ptr + tree->max_key_len + 2; hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - mutex_lock(&tree->tree_lock); + switch (tree->cnid) { + case HFS_CAT_CNID: + mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); + break; + case HFS_EXT_CNID: + mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); + break; + case HFS_ATTR_CNID: + mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); + break; + default: + return -EINVAL; + } return 0; } diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index 4ba45caf593929..0e6baee932453b 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -13,6 +13,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *); #define NODE_HASH_SIZE 256 +/* B-tree mutex nested subclasses */ +enum hfs_btree_mutex_classes { + CATALOG_BTREE_MUTEX, + EXTENTS_BTREE_MUTEX, + ATTR_BTREE_MUTEX, +}; + /* A HFS BTree held in memory */ struct hfs_btree { struct super_block *sb; From d08af0a59684e18a51aa4bfd24c658994ea3fc5b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 14 Jul 2021 21:27:11 -0700 Subject: [PATCH 421/714] mm/hugetlb: fix refs calculation from unaligned @vaddr Commit 82e5d378b0e47 ("mm/hugetlb: refactor subpage recording") refactored the count of subpages but missed an edge case when @vaddr is not aligned to PAGE_SIZE e.g. when close to vma->vm_end. It would then errousnly set @refs to 0 and record_subpages_vmas() wouldn't set the @pages array element to its value, consequently causing the reported null-deref by syzbot. Fix it by aligning down @vaddr by PAGE_SIZE in @refs calculation. Link: https://lkml.kernel.org/r/20210713152440.28650-1-joao.m.martins@oracle.com Fixes: 82e5d378b0e47 ("mm/hugetlb: refactor subpage recording") Reported-by: syzbot+a3fcd59df1b372066f5a@syzkaller.appspotmail.com Signed-off-by: Joao Martins Reviewed-by: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 924553aa8f789a..dfc940d5221dca 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5440,8 +5440,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, continue; } - refs = min3(pages_per_huge_page(h) - pfn_offset, - (vma->vm_end - vaddr) >> PAGE_SHIFT, remainder); + /* vaddr may not be aligned to PAGE_SIZE */ + refs = min3(pages_per_huge_page(h) - pfn_offset, remainder, + (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT); if (pages || vmas) record_subpages_vmas(mem_map_offset(page, pfn_offset), From e7efc2ce3d0789cd7c21b70ff00cd7838d382639 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 14 Jul 2021 16:23:43 +0100 Subject: [PATCH 422/714] liquidio: Fix unintentional sign extension issue on left shift of u16 Shifting the u16 integer oct->pcie_port by CN23XX_PKT_INPUT_CTL_MAC_NUM_POS (29) bits will be promoted to a 32 bit signed int and then sign-extended to a u64. In the cases where oct->pcie_port where bit 2 is set (e.g. 3..7) the shifted value will be sign extended and the top 32 bits of the result will be set. Fix this by casting the u16 values to a u64 before the 29 bit left shift. Addresses-Coverity: ("Unintended sign extension") Fixes: 3451b97cce2d ("liquidio: CN23XX register setup") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 4cddd628d41b24..9ed3d1ab2ca580 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -420,7 +420,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) * bits 32:47 indicate the PVF num. */ for (q_no = 0; q_no < ern; q_no++) { - reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; + reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; /* for VF assigned queues. */ if (q_no < oct->sriov_info.pf_srn) { From 65875073eddd24d7b3968c1501ef29277398dc7b Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 22:59:23 -0700 Subject: [PATCH 423/714] net: use %px to print skb address in trace_netif_receive_skb The print format of skb adress in tracepoint class net_dev_template is changed to %px from %p, because we want to use skb address as a quick way to identify a packet. Note, trace ring buffer is only accessible to privileged users, it is safe to use a real kernel address here. Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 2399073c3afc60..78c448c6ab4c5c 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%px len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) From 851f36e40962408309ad2665bf0056c19a97881c Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 23:00:21 -0700 Subject: [PATCH 424/714] net_sched: use %px to print skb address in trace_qdisc_dequeue() Print format of skbaddr is changed to %px from %p, because we want to use skb address as a quick way to identify a packet. Note, trace ring buffer is only accessible to privileged users, it is safe to use a real kernel address here. Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 330d32d84485b9..58209557cb3a52 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -41,7 +41,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state = txq->state; ), - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); From 70713dddf3d25a02d1952f8c5d2688c986d2f2fb Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 23:03:24 -0700 Subject: [PATCH 425/714] net_sched: introduce tracepoint trace_qdisc_enqueue() Tracepoint trace_qdisc_enqueue() is introduced to trace skb at the entrance of TC layer on TX side. This is similar to trace_qdisc_dequeue(): 1. For both we only trace successful cases. The failure cases can be traced via trace_kfree_skb(). 2. They are called at entrance or exit of TC layer, not for each ->enqueue() or ->dequeue(). This is intentional, because we want to make trace_qdisc_enqueue() symmetric to trace_qdisc_dequeue(), which is easier to use. The return value of qdisc_enqueue() is not interesting here, we have Qdisc's drop packets in ->dequeue(), it is impossible to trace them even if we have the return value, the only way to trace them is tracing kfree_skb(). We only add information we need to trace ring buffer. If any other information is needed, it is easy to extend it without breaking ABI, see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all tcp:tracepoints"). Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ net/core/dev.c | 20 ++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 58209557cb3a52..c3006c6b4a875b 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); +TRACE_EVENT(qdisc_enqueue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, skb), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + ), + + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) +); + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a20483b..7aeefc467ddc57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) } } +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, + struct sk_buff **to_free, + struct netdev_queue *txq) +{ + int rc; + + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * of q->seqlock to protect from racing with requeuing. */ if (unlikely(!nolock_qdisc_is_empty(q))) { - rc = q->enqueue(skb, q, &to_free) & - NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); qdisc_run_end(q); @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return NET_XMIT_SUCCESS; } - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); no_lock_out: @@ -3923,7 +3935,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); From 91091656252f5d6d8c476e0c92776ce9fae7b445 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Jul 2021 13:57:12 +0100 Subject: [PATCH 426/714] s390/bpf: Perform r1 range checking before accessing jit->seen_reg[r1] Currently array jit->seen_reg[r1] is being accessed before the range checking of index r1. The range changing on r1 should be performed first since it will avoid any potential out-of-range accesses on the array seen_reg[] and also it is more optimal to perform checks on r1 before fetching data from the array. Fix this by swapping the order of the checks before the array access. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Tested-by: Ilya Leoshkevich Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20210715125712.24690-1-colin.king@canonical.com --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 63cae0476bb497..2ae419f5115a5a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -112,7 +112,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) { u32 r1 = reg2hex[b1]; - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) jit->seen_reg[r1] = 1; } From 7e6b27a69167f97c56b5437871d29e9722c3e470 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 12 Jul 2021 12:55:45 -0700 Subject: [PATCH 427/714] bpf, sockmap: Fix potential memory leak on unlikely error case If skb_linearize is needed and fails we could leak a msg on the error handling. To fix ensure we kfree the msg block before returning error. Found during code review. Fixes: 4363023d2668e ("bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Link: https://lore.kernel.org/bpf/20210712195546.423990-2-john.fastabend@gmail.com --- net/core/skmsg.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9b6160a191f8fe..15d71288e741f9 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, if (skb_linearize(skb)) return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); - if (unlikely(num_sge < 0)) { - kfree(msg); + if (unlikely(num_sge < 0)) return num_sge; - } copied = skb->len; msg->sg.start = 0; @@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) { struct sock *sk = psock->sk; struct sk_msg *msg; + int err; /* If we are receiving on the same sock skb->sk is already assigned, * skip memory accounting and owner transition seeing it already set @@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } /* Puts an skb on the ingress queue of the socket already assigned to the @@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; + int err; if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, From 228a4a7ba8e99bb9ef980b62f71e3be33f4aae69 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 12 Jul 2021 12:55:46 -0700 Subject: [PATCH 428/714] bpf, sockmap, tcp: sk_prot needs inuse_idx set for proc stats The proc socket stats use sk_prot->inuse_idx value to record inuse sock stats. We currently do not set this correctly from sockmap side. The result is reading sock stats '/proc/net/sockstat' gives incorrect values. The socket counter is incremented correctly, but because we don't set the counter correctly when we replace sk_prot we may omit the decrement. To get the correct inuse_idx value move the core_initcall that initializes the TCP proto handlers to late_initcall. This way it is initialized after TCP has the chance to assign the inuse_idx value from the register protocol handler. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Suggested-by: Jakub Sitnicki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Link: https://lore.kernel.org/bpf/20210712195546.423990-3-john.fastabend@gmail.com --- net/ipv4/tcp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f26916a62f2569..d3e9386b493eb3 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -503,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void) tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot); return 0; } -core_initcall(tcp_bpf_v4_build_proto); +late_initcall(tcp_bpf_v4_build_proto); static int tcp_bpf_assert_proto_ops(struct proto *ops) { From 54ea2f49fd9400dd698c25450be3352b5613b3b4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 14 Jul 2021 17:47:50 +0200 Subject: [PATCH 429/714] bpf, sockmap, udp: sk_prot needs inuse_idx set for proc stats The proc socket stats use sk_prot->inuse_idx value to record inuse sock stats. We currently do not set this correctly from sockmap side. The result is reading sock stats '/proc/net/sockstat' gives incorrect values. The socket counter is incremented correctly, but because we don't set the counter correctly when we replace sk_prot we may omit the decrement. To get the correct inuse_idx value move the core_initcall that initializes the UDP proto handlers to late_initcall. This way it is initialized after UDP has the chance to assign the inuse_idx value from the register protocol handler. Fixes: edc6741cc660 ("bpf: Add sockmap hooks for UDP sockets") Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210714154750.528206-1-jakub@cloudflare.com --- net/ipv4/udp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 45b8782aec0cc1..9f5a5cdc38e646 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -134,7 +134,7 @@ static int __init udp_bpf_v4_build_proto(void) udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); return 0; } -core_initcall(udp_bpf_v4_build_proto); +late_initcall(udp_bpf_v4_build_proto); int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { From d444b06e40855219ef38b5e9286db16d435f06dc Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 15 Jul 2021 13:06:09 +0200 Subject: [PATCH 430/714] bpftool: Check malloc return value in mount_bpffs_for_pin Fix and add a missing NULL check for the prior malloc() call. Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20210715110609.29364-1-tklauser@distanz.ch --- tools/bpf/bpftool/common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1828bba19020d6..dc6daa193557a9 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -222,6 +222,11 @@ int mount_bpffs_for_pin(const char *name) int err = 0; file = malloc(strlen(name) + 1); + if (!file) { + p_err("mem alloc failed"); + return -1; + } + strcpy(file, name); dir = dirname(file); From 991e634360f2622a683b48dfe44fe6d9cb765a09 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 15 Jul 2021 20:22:04 +0800 Subject: [PATCH 431/714] net: fix uninit-value in caif_seqpkt_sendmsg When nr_segs equal to zero in iovec_from_user, the object msg->msg_iter.iov is uninit stack memory in caif_seqpkt_sendmsg which is defined in ___sys_sendmsg. So we cann't just judge msg->msg_iter.iov->base directlly. We can use nr_segs to judge msg in caif_seqpkt_sendmsg whether has data buffers. ===================================================== BUG: KMSAN: uninit-value in caif_seqpkt_sendmsg+0x693/0xf60 net/caif/caif_socket.c:542 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 caif_seqpkt_sendmsg+0x693/0xf60 net/caif/caif_socket.c:542 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2343 ___sys_sendmsg net/socket.c:2397 [inline] __sys_sendmmsg+0x808/0xc90 net/socket.c:2480 __compat_sys_sendmmsg net/compat.c:656 [inline] Reported-by: syzbot+09a5d591c1f98cf5efcb@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=1ace85e8fc9b0d5a45c08c2656c3e91762daa9b8 Fixes: bece7b2398d0 ("caif: Rewritten socket implementation") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 647554c9813b98..e12fd3cad61942 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, goto err; ret = -EINVAL; - if (unlikely(msg->msg_iter.iov->iov_base == NULL)) + if (unlikely(msg->msg_iter.nr_segs == 0) || + unlikely(msg->msg_iter.iov->iov_base == NULL)) goto err; noblock = msg->msg_flags & MSG_DONTWAIT; From 55cef88bbf12f3bfbe5c2379a8868a034707e755 Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Thu, 15 Jul 2021 16:21:32 +0000 Subject: [PATCH 432/714] spi: spi-cadence-quadspi: Fix division by zero warning Fix below division by zero warning: - Added an if statement because buswidth can be zero, resulting in division by zero. - The modified code was based on another driver (atmel-quadspi). [ 0.795337] Division by zero in kernel. : [ 0.834051] [<807fd40c>] (__div0) from [<804e1acc>] (Ldiv0+0x8/0x10) [ 0.839097] [<805f0710>] (cqspi_exec_mem_op) from [<805edb4c>] (spi_mem_exec_op+0x3b0/0x3f8) Fixes: 7512eaf54190 ("spi: cadence-quadspi: Fix dummy cycle calculation when buswidth > 1") Signed-off-by: Yoshitaka Ikeda Link: https://lore.kernel.org/r/ed989af6-da88-4e0b-9ed8-126db6cad2e4@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 7a00346ff9b92b..13d1f0ce618eb8 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -307,11 +307,13 @@ static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata) static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { - unsigned int dummy_clk; + unsigned int dummy_clk = 0; - dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); - if (dtr) - dummy_clk /= 2; + if (op->dummy.buswidth && op->dummy.nbytes) { + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); + if (dtr) + dummy_clk /= 2; + } return dummy_clk; } From a1c9ca5f65c9acfd7c02474b9d5cacbd7ea288df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jul 2021 11:55:31 -0700 Subject: [PATCH 433/714] EDAC/igen6: fix core dependency AGAIN My previous patch had a typo/thinko which prevents this driver from being enabled: change X64_64 to X86_64. Fixes: 0a9ece9ba154 ("EDAC/igen6: fix core dependency") Signed-off-by: Randy Dunlap Cc: Qiuxu Zhuo Cc: Borislav Petkov Cc: Mauro Carvalho Chehab Cc: linux-edac@vger.kernel.org Cc: bowsingbetee Cc: stable@vger.kernel.org Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- drivers/edac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 91164c5f0757db..2fc4c3f91fd546 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -271,7 +271,7 @@ config EDAC_PND2 config EDAC_IGEN6 tristate "Intel client SoC Integrated MC" depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG - depends on X64_64 && X86_MCE_INTEL + depends on X86_64 && X86_MCE_INTEL help Support for error detection and correction on the Intel client SoC Integrated Memory Controller using In-Band ECC IP. From 6206b7981a36476f4695d661ae139f7db36a802d Mon Sep 17 00:00:00 2001 From: Jia He Date: Thu, 15 Jul 2021 16:08:21 +0800 Subject: [PATCH 434/714] qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union() Liajian reported a bug_on hit on a ThunderX2 arm64 server with FastLinQ QL41000 ethernet controller: BUG: scheduling while atomic: kworker/0:4/531/0x00000200 [qed_probe:488()]hw prepare failed kernel BUG at mm/vmalloc.c:2355! Internal error: Oops - BUG: 0 [#1] SMP CPU: 0 PID: 531 Comm: kworker/0:4 Tainted: G W 5.4.0-77-generic #86-Ubuntu pstate: 00400009 (nzcv daif +PAN -UAO) Call trace: vunmap+0x4c/0x50 iounmap+0x48/0x58 qed_free_pci+0x60/0x80 [qed] qed_probe+0x35c/0x688 [qed] __qede_probe+0x88/0x5c8 [qede] qede_probe+0x60/0xe0 [qede] local_pci_probe+0x48/0xa0 work_for_cpu_fn+0x24/0x38 process_one_work+0x1d0/0x468 worker_thread+0x238/0x4e0 kthread+0xf0/0x118 ret_from_fork+0x10/0x18 In this case, qed_hw_prepare() returns error due to hw/fw error, but in theory work queue should be in process context instead of interrupt. The root cause might be the unpaired spin_{un}lock_bh() in _qed_mcp_cmd_and_union(), which causes botton half is disabled incorrectly. Reported-by: Lijian Zhang Signed-off-by: Jia He Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 4387292c37e2f9..79d879a5d66363 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -474,14 +474,18 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (!qed_mcp_has_pending_cmd(p_hwfn)) + if (!qed_mcp_has_pending_cmd(p_hwfn)) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; + } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) + if (!rc) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; - else if (rc != -EAGAIN) + } else if (rc != -EAGAIN) { goto err; + } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -498,6 +502,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); + /* Send the mailbox command */ qed_mcp_reread_offsets(p_hwfn, p_ptt); seq_num = ++p_hwfn->mcp_info->drv_mb_seq; @@ -524,14 +530,18 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (p_cmd_elem->b_is_completed) + if (p_cmd_elem->b_is_completed) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; + } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) + if (!rc) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; - else if (rc != -EAGAIN) + } else if (rc != -EAGAIN) { goto err; + } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); } while (++cnt < max_retries); @@ -554,6 +564,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); From a6ecfb39ba9d7316057cea823b196b734f6b18ca Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Jul 2021 17:13:22 +0800 Subject: [PATCH 435/714] usb: hso: fix error handling code of hso_create_net_device The current error handling code of hso_create_net_device is hso_free_net_device, no matter which errors lead to. For example, WARNING in hso_free_net_device [1]. Fix this by refactoring the error handling code of hso_create_net_device by handling different errors by different code. [1] https://syzkaller.appspot.com/bug?id=66eff8d49af1b28370ad342787413e35bbe76efe Reported-by: syzbot+44d53c7255bb1aea22d2@syzkaller.appspotmail.com Fixes: 5fcfb6d0bfcd ("hso: fix bailout in error case of probe") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 63006838bdccd5..dec96e8ab56791 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2495,7 +2495,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_net_init); if (!net) { dev_err(&interface->dev, "Unable to create ethernet device\n"); - goto exit; + goto err_hso_dev; } hso_net = netdev_priv(net); @@ -2508,13 +2508,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, USB_DIR_IN); if (!hso_net->in_endp) { dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); - goto exit; + goto err_net; } hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT); if (!hso_net->out_endp) { dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); - goto exit; + goto err_net; } SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); @@ -2523,18 +2523,18 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_rx_urb_pool[i]) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) - goto exit; + goto err_mux_bulk_rx; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_tx_urb) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) - goto exit; + goto err_free_tx_urb; add_net_device(hso_dev); @@ -2542,7 +2542,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, result = register_netdev(net); if (result) { dev_err(&interface->dev, "Failed to register device\n"); - goto exit; + goto err_free_tx_buf; } hso_log_port(hso_dev); @@ -2550,8 +2550,21 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_create_rfkill(hso_dev, interface); return hso_dev; -exit: - hso_free_net_device(hso_dev, true); + +err_free_tx_buf: + remove_net_device(hso_dev); + kfree(hso_net->mux_bulk_tx_buf); +err_free_tx_urb: + usb_free_urb(hso_net->mux_bulk_tx_urb); +err_mux_bulk_rx: + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); + kfree(hso_net->mux_bulk_rx_buf_pool[i]); + } +err_net: + free_netdev(net); +err_hso_dev: + kfree(hso_dev); return NULL; } From 9a3223b0713369e6258fd8656e0c0a5ed794d186 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 15 Jul 2021 12:59:52 -0600 Subject: [PATCH 436/714] ASoC: dt-bindings: renesas: rsnd: Fix incorrect 'port' regex schema A property regex goes under 'patternProperties', not 'properties' schema. Otherwise, the regex is interpretted as a fixed string. Fixes: 17c2d247ddd2 ("ASoC: dt-bindings: renesas: rsnd: tidyup properties") Cc: Mark Brown Cc: Kuninori Morimoto Cc: alsa-devel@alsa-project.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210715185952.1470138-1-robh@kernel.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/renesas,rsnd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml index ee936d1aa724c1..c2930d65728ed2 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml +++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml @@ -114,7 +114,7 @@ properties: ports: $ref: /schemas/graph.yaml#/properties/ports - properties: + patternProperties: port(@[0-9a-f]+)?: $ref: audio-graph-port.yaml# unevaluatedProperties: false From 0967ebffe098157180a0bbd180ac90348c6e07d7 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:06 +0200 Subject: [PATCH 437/714] perf inject: Fix dso->nsinfo refcounting ASan reports a memory leak of nsinfo during the execution of: # perf test "31: Lookup mmap thread" The leak is caused by a refcounted variable being replaced without dropping the refcount. This patch makes sure that the refcnt of nsinfo is decreased when a refcounted variable is replaced with a new value. Signed-off-by: Riccardo Mancini Fixes: 27c9c3424fc217da ("perf inject: Add --buildid-all option") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/55223bc8821b34ccb01f92ef1401c02b6a32e61f.1626343282.git.rickyman7@gmail.com [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 5d6f583e2cd35b..ffd2b25039e36e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -361,9 +361,10 @@ static struct dso *findnew_dso(int pid, int tid, const char *filename, dso = machine__findnew_dso_id(machine, filename, id); } - if (dso) + if (dso) { + nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; - else + } else nsinfo__put(nsi); thread__put(thread); From 2d6b74baa7147251c30a46c4996e8cc224aa2dc5 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:06 +0200 Subject: [PATCH 438/714] perf map: Fix dso->nsinfo refcounting ASan reports a memory leak of nsinfo during the execution of # perf test "31: Lookup mmap thread" The leak is caused by a refcounted variable being replaced without dropping the refcount. This patch makes sure that the refcnt of nsinfo is decreased whenever a refcounted variable is replaced with a new value. Signed-off-by: Riccardo Mancini Fixes: bf2e710b3cb8445c ("perf maps: Lookup maps in both intitial mountns and inner mountns.") Cc: Ian Rogers Cc: Jiri Olsa Cc: Krister Johansen Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/55223bc8821b34ccb01f92ef1401c02b6a32e61f.1626343282.git.rickyman7@gmail.com [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8af693d9678cef..72e7f3616157ea 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,6 +192,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } + + nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) From dedeb4be203b382ba7245d13079bc3b0f6d40c65 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:06 +0200 Subject: [PATCH 439/714] perf probe: Fix dso->nsinfo refcounting ASan reports a memory leak of nsinfo during the execution of: # perf test "31: Lookup mmap thread". The leak is caused by a refcounted variable being replaced without dropping the refcount. This patch makes sure that the refcnt of nsinfo is decreased whenever a refcounted variable is replaced with a new value. Signed-off-by: Riccardo Mancini Fixes: 544abd44c7064c8a ("perf probe: Allow placing uprobes in alternate namespaces.") Cc: Ian Rogers Cc: Jiri Olsa Cc: Krister Johansen Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/55223bc8821b34ccb01f92ef1401c02b6a32e61f.1626343282.git.rickyman7@gmail.com [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c14e1d228e56b1..e05750cea34c3a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -179,8 +179,10 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) struct map *map; map = dso__new_map(target); - if (map && map->dso) + if (map && map->dso) { + nsinfo__put(map->dso->nsinfo); map->dso->nsinfo = nsinfo__get(nsi); + } return map; } else { return kernel_get_module_map(target); From 42db3d9ded555f7148b5695109a7dc8d66f0dde4 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:07 +0200 Subject: [PATCH 440/714] perf env: Fix sibling_dies memory leak ASan reports a memory leak in perf_env while running: # perf test "41: Session topology" Caused by sibling_dies not being freed. This patch adds the required free. Fixes: acae8b36cded0ee6 ("perf header: Add die information in CPU topology") Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/2140d0b57656e4eb9021ca9772250c24c032924b.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index ebc5e9ad35db21..6c765946ef6f59 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -186,6 +186,7 @@ void perf_env__exit(struct perf_env *env) zfree(&env->cpuid); zfree(&env->cmdline); zfree(&env->cmdline_argv); + zfree(&env->sibling_dies); zfree(&env->sibling_cores); zfree(&env->sibling_threads); zfree(&env->pmu_mappings); From 233f2dc1c284337286f9a64c0152236779a42f6c Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:08 +0200 Subject: [PATCH 441/714] perf test session_topology: Delete session->evlist ASan reports a memory leak related to session->evlist while running: # perf test "41: Session topology". When perf_data is in write mode, session->evlist is owned by the caller, which should also take care of deleting it. This patch adds the missing evlist__delete(). Signed-off-by: Riccardo Mancini Fixes: c84974ed9fb67293 ("perf test: Add entry to test cpu topology") Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/822f741f06eb25250fb60686cf30a35f447e9e91.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ec4e3b21b8311f..b5efe675b32174 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -61,6 +61,7 @@ static int session_write_header(char *path) TEST_ASSERT_VAL("failed to write header", !perf_session__write_header(session, session->evlist, data.file.fd, true)); + evlist__delete(session->evlist); perf_session__delete(session); return 0; From fc56f54f6fcd5337634f4545af6459613129b432 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:09 +0200 Subject: [PATCH 442/714] perf test event_update: Fix memory leak of evlist ASan reports a memory leak when running: # perf test "49: Synthesize attr update" Caused by evlist not being deleted. This patch adds the missing evlist__delete and removes the perf_cpu_map__put since it's already being deleted by evlist__delete. Signed-off-by: Riccardo Mancini Fixes: a6e5281780d1da65 ("perf tools: Add event_update event unit type") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/f7994ad63d248f7645f901132d208fadf9f2b7e4.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/event_update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 656218179222cc..932ab0740d11ce 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -118,6 +118,6 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - perf_cpu_map__put(evsel->core.own_cpus); + evlist__delete(evlist); return 0; } From dccfca926c351ba0893af4c8b481477bdb2881a4 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:10 +0200 Subject: [PATCH 443/714] perf test event_update: Fix memory leak of unit ASan reports a memory leak while running: # perf test "49: Synthesize attr update" Caused by a string being duplicated but never freed. This patch adds the missing free(). Note that evsel->unit is not deallocated together with evsel since it is supposed to be a constant string. Signed-off-by: Riccardo Mancini Fixes: a6e5281780d1da65 ("perf tools: Add event_update event unit type") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1fbc8158663fb0d4d5392e36bae564f6ad60be3c.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/event_update.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 932ab0740d11ce..44a50527f9d951 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,6 +88,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); + char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -98,7 +99,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = strdup("KRAVA"); + evsel->unit = unit; TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -118,6 +119,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); + free(unit); evlist__delete(evlist); return 0; } From 581e295a0f6b5c2931d280259fbbfff56959faa9 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:11 +0200 Subject: [PATCH 444/714] perf dso: Fix memory leak in dso__new_map() ASan reports a memory leak when running: # perf test "65: maps__merge_in". The causes of the leaks are two, this patch addresses only the first one, which is related to dso__new_map(). The bug is that dso__new_map() creates a new dso but never decreases the refcount it gets from creating it. This patch adds the missing dso__put(). Signed-off-by: Riccardo Mancini Fixes: d3a7c489c7fd2463 ("perf tools: Reference count struct dso") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/60bfe0cd06e89e2ca33646eb8468d7f5de2ee597.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d786cf6b0cfa65..ee15db2be2f434 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1154,8 +1154,10 @@ struct map *dso__new_map(const char *name) struct map *map = NULL; struct dso *dso = dso__new(name); - if (dso) + if (dso) { map = map__new2(0, dso); + dso__put(dso); + } return map; } From 244d1797c8c8e850b8de7992af713aa5c70d5650 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:12 +0200 Subject: [PATCH 445/714] perf test maps__merge_in: Fix memory leak of maps ASan reports a memory leak when running: # perf test "65: maps__merge_in" This is the second and final patch addressing these memory leaks. This time, the problem is simply that the maps object is never destructed. This patch adds the missing maps__exit call. Signed-off-by: Riccardo Mancini Fixes: 79b6bb73f888933c ("perf maps: Merge 'struct maps' with 'struct map_groups'") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/a1a29b97a58738987d150e94d4ebfad0282fb038.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/maps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index edcbc70ff9d66e..1ac72919fa3586 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -116,5 +116,7 @@ int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unus ret = check_maps(merged3, ARRAY_SIZE(merged3), &maps); TEST_ASSERT_VAL("merge check failed", !ret); + + maps__exit(&maps); return TEST_OK; } From da6b7c6c0626901428245f65712385805e42eba6 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:13 +0200 Subject: [PATCH 446/714] perf env: Fix memory leak of cpu_pmu_caps ASan reports memory leaks while running: # perf test "83: Zstd perf.data compression/decompression" The first of the leaks is caused by env->cpu_pmu_caps not being freed. This patch adds the missing (z)free inside perf_env__exit. Signed-off-by: Riccardo Mancini Fixes: 6f91ea283a1ed23e ("perf header: Support CPU PMU capabilities") Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/6ba036a8220156ec1f3d6be3e5d25920f6145028.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6c765946ef6f59..cec2e6cad8aabf 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -191,6 +191,7 @@ void perf_env__exit(struct perf_env *env) zfree(&env->sibling_threads); zfree(&env->pmu_mappings); zfree(&env->cpu); + zfree(&env->cpu_pmu_caps); zfree(&env->numa_map); for (i = 0; i < env->nr_numa_nodes; i++) From a37338aad8c4d8676173ead14e881d2ec308155c Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:14 +0200 Subject: [PATCH 447/714] perf report: Free generated help strings for sort option ASan reports the memory leak of the strings allocated by sort_help() when running perf report. This patch changes the returned pointer to char* (instead of const char*), saves it in a temporary variable, and finally deallocates it at function exit. Signed-off-by: Riccardo Mancini Fixes: 702fb9b415e7c99b ("perf report: Show all sort keys in help output") Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/a38b13f02812a8a6759200b9063c6191337f44d4.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 33 ++++++++++++++++++++++----------- tools/perf/util/sort.c | 2 +- tools/perf/util/sort.h | 2 +- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6386af6a261236..dc0364f671b97d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1175,6 +1175,8 @@ int cmd_report(int argc, const char **argv) .annotation_opts = annotation__default_options, .skip_empty = true, }; + char *sort_order_help = sort_help("sort by key(s):"); + char *field_order_help = sort_help("output field(s): overhead period sample "); const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1209,9 +1211,9 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "header-only", &report.header_only, "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - sort_help("sort by key(s):")), + sort_order_help), OPT_STRING('F', "fields", &field_order, "key[,keys...]", - sort_help("output field(s): overhead period sample ")), + field_order_help), OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -1344,11 +1346,11 @@ int cmd_report(int argc, const char **argv) char sort_tmp[128]; if (ret < 0) - return ret; + goto exit; ret = perf_config(report__config, &report); if (ret) - return ret; + goto exit; argc = parse_options(argc, argv, options, report_usage, 0); if (argc) { @@ -1362,8 +1364,10 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } - if (annotate_check_args(&report.annotation_opts) < 0) - return -EINVAL; + if (annotate_check_args(&report.annotation_opts) < 0) { + ret = -EINVAL; + goto exit; + } if (report.mmaps_mode) report.tasks_mode = true; @@ -1377,12 +1381,14 @@ int cmd_report(int argc, const char **argv) if (symbol_conf.vmlinux_name && access(symbol_conf.vmlinux_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (symbol_conf.kallsyms_name && access(symbol_conf.kallsyms_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (report.inverted_callchain) @@ -1406,12 +1412,14 @@ int cmd_report(int argc, const char **argv) repeat: session = perf_session__new(&data, false, &report.tool); - if (IS_ERR(session)) - return PTR_ERR(session); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto exit; + } ret = evswitch__init(&report.evswitch, session->evlist, stderr); if (ret) - return ret; + goto exit; if (zstd_init(&(session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); @@ -1646,5 +1654,8 @@ int cmd_report(int argc, const char **argv) zstd_fini(&(session->zstd_data)); perf_session__delete(session); +exit: + free(sort_order_help); + free(field_order_help); return ret; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 88ce47f2547e35..568a88c001c6cb 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3370,7 +3370,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int add_key(sb, s[i].name, llen); } -const char *sort_help(const char *prefix) +char *sort_help(const char *prefix) { struct strbuf sb; char *s; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 87a092645aa72e..b67c469aba7958 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -302,7 +302,7 @@ void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); -const char *sort_help(const char *prefix); +char *sort_help(const char *prefix); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); From 02e6246f5364d5260a6ea6f92ab6f409058b162f Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:15 +0200 Subject: [PATCH 448/714] perf inject: Close inject.output on exit ASan reports a memory leak when running: # perf test "83: Zstd perf.data compression/decompression" which happens inside 'perf inject'. The bug is caused by inject.output never being closed. This patch adds the missing perf_data__close(). Signed-off-by: Riccardo Mancini Fixes: 6ef81c55a2b6584c ("perf session: Return error code for perf_session__new() function on failure") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mamatha Inamdar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/c06f682afa964687367cf6e92a64ceb49aec76a5.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ffd2b25039e36e..c88c61e7f8ccb6 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -993,8 +993,10 @@ int cmd_inject(int argc, const char **argv) data.path = inject.input_name; inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool); - if (IS_ERR(inject.session)) - return PTR_ERR(inject.session); + if (IS_ERR(inject.session)) { + ret = PTR_ERR(inject.session); + goto out_close_output; + } if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); @@ -1036,6 +1038,8 @@ int cmd_inject(int argc, const char **argv) out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); +out_close_output: + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } From 423b9174f5f71fd3d245f4da0feaf958976f66e7 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:16 +0200 Subject: [PATCH 449/714] perf session: Cleanup trace_event ASan reports several memory leaks when running: # perf test "82: Use vfs_getname probe to get syscall args filenames" many of which are related to session->tevent. This patch will solve this problem, then next patch will fix the remaining memory leaks in 'perf script'. This bug is due to a missing deallocation of the trace_event data strutures. This patch adds the missing trace_event__cleanup() in perf_session__delete(). Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/fa2a3f221d90e47ce4e5b7e2d6e64c3509ddc96a.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e9c929a39973ac..51f72740291267 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -306,6 +306,7 @@ void perf_session__delete(struct perf_session *session) evlist__delete(session->evlist); perf_data__close(session->data); } + trace_event__cleanup(&session->tevent); free(session); } From 1b1f57cf9e4c8eb16c8f6b2ce12cc5dd3517fc61 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:17 +0200 Subject: [PATCH 450/714] perf script: Release zstd data ASan reports several memory leak while running: # perf test "82: Use vfs_getname probe to get syscall args filenames" One of the leaks is caused by zstd data not being released on exit in perf-script. This patch adds the missing zstd_fini(). Signed-off-by: Riccardo Mancini Fixes: b13b04d9382113f7 ("perf script: Initialize zstd_data") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/39388e8cc2f85ca219ea18697a17b7bd8f74b693.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8c03a9862872d4..bae0e5b72c0e60 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4143,6 +4143,7 @@ int cmd_script(int argc, const char **argv) zfree(&script.ptime_range); } + zstd_fini(&(session->zstd_data)); evlist__free_stats(session->evlist); perf_session__delete(session); From faf3ac305d61341c74e5cdd9e41daecce7f67bfe Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:18 +0200 Subject: [PATCH 451/714] perf script: Fix memory 'threads' and 'cpus' leaks on exit ASan reports several memory leaks while running: # perf test "82: Use vfs_getname probe to get syscall args filenames" Two of these are caused by some refcounts not being decreased on perf-script exit, namely script.threads and script.cpus. This patch adds the missing __put calls in a new perf_script__exit function, which is called at the end of cmd_script. This patch concludes the fixes of all remaining memory leaks in perf test "82: Use vfs_getname probe to get syscall args filenames". Signed-off-by: Riccardo Mancini Fixes: cfc8874a48599249 ("perf script: Process cpu/threads maps") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/5ee73b19791c6fa9d24c4d57f4ac1a23609400d7.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bae0e5b72c0e60..064da7f3618d39 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2601,6 +2601,12 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) } } +static void perf_script__exit(struct perf_script *script) +{ + perf_thread_map__put(script->threads); + perf_cpu_map__put(script->cpus); +} + static int __cmd_script(struct perf_script *script) { int ret; @@ -4146,6 +4152,7 @@ int cmd_script(int argc, const char **argv) zstd_fini(&(session->zstd_data)); evlist__free_stats(session->evlist); perf_session__delete(session); + perf_script__exit(&script); if (script_started) cleanup_scripting(); From f8cbb0f926ae1e1fb5f9e51614e5437560ed4039 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:19 +0200 Subject: [PATCH 452/714] perf lzma: Close lzma stream on exit ASan reports memory leaks when running: # perf test "88: Check open filename arg using perf trace + vfs_getname" One of these is caused by the lzma stream never being closed inside lzma_decompress_to_file(). This patch adds the missing lzma_end(). Signed-off-by: Riccardo Mancini Fixes: 80a32e5b498a7547 ("perf tools: Add lzma decompression support for kernel module") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/aaf50bdce7afe996cfc06e1bbb36e4a2a9b9db93.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/lzma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 39062df0262915..51424cdc3b682c 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -69,7 +69,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) if (ferror(infile)) { pr_err("lzma: read error: %s\n", strerror(errno)); - goto err_fclose; + goto err_lzma_end; } if (feof(infile)) @@ -83,7 +83,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) if (writen(output_fd, buf_out, write_size) != write_size) { pr_err("lzma: write error: %s\n", strerror(errno)); - goto err_fclose; + goto err_lzma_end; } strm.next_out = buf_out; @@ -95,11 +95,13 @@ int lzma_decompress_to_file(const char *input, int output_fd) break; pr_err("lzma: failed %s\n", lzma_strerror(ret)); - goto err_fclose; + goto err_lzma_end; } } err = 0; +err_lzma_end: + lzma_end(&strm); err_fclose: fclose(infile); return err; From 6c7f0ab04707c2882f08d5abb9dc41b54493b61c Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:20 +0200 Subject: [PATCH 453/714] perf trace: Free malloc'd trace fields on exit ASan reports several memory leaks running: # perf test "88: Check open filename arg using perf trace + vfs_getname" The first of these leaks is related to struct trace fields never being deallocated. This patch adds the function trace__exit, which is called at the end of cmd_trace, replacing the existing deallocation, which is now moved inside the new function. This function deallocates: - ev_qualifier - ev_qualifier_ids.entries - syscalls.table - sctbl - perfconfig_events Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/de5945ed5c0cb882cbfa3268567d0bff460ff016.1626343282.git.rickyman7@gmail.com [ Removed needless initialization to zero, missing named initializers are zeroed by the compiler ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7ec18ff57fc4ae..e239e322e0306d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4701,6 +4701,15 @@ static int trace__config(const char *var, const char *value, void *arg) return err; } +static void trace__exit(struct trace *trace) +{ + strlist__delete(trace->ev_qualifier); + free(trace->ev_qualifier_ids.entries); + free(trace->syscalls.table); + syscalltbl__delete(trace->sctbl); + zfree(&trace->perfconfig_events); +} + int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { @@ -5135,6 +5144,6 @@ int cmd_trace(int argc, const char **argv) if (output_name != NULL) fclose(trace.output); out: - zfree(&trace.perfconfig_events); + trace__exit(&trace); return err; } From f2ebf8ffe7af10bff02d34addbebd9199de65ed2 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:21 +0200 Subject: [PATCH 454/714] perf trace: Free syscall->arg_fmt ASan reports several memory leaks running: # perf test "88: Check open filename arg using perf trace + vfs_getname" The second of these leaks is caused by the arg_fmt field of syscall not being deallocated. This patch adds a new function syscall__exit which is called on all syscalls.table entries in trace__exit, which will free the arg_fmt field. Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/d68f25c043d30464ac9fa79c3399e18f429bca82.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e239e322e0306d..4039ff4c078831 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2266,6 +2266,14 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam return augmented_args; } +static void syscall__exit(struct syscall *sc) +{ + if (!sc) + return; + + free(sc->arg_fmt); +} + static int trace__sys_enter(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -4703,9 +4711,15 @@ static int trace__config(const char *var, const char *value, void *arg) static void trace__exit(struct trace *trace) { + int i; + strlist__delete(trace->ev_qualifier); free(trace->ev_qualifier_ids.entries); - free(trace->syscalls.table); + if (trace->syscalls.table) { + for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) + syscall__exit(&trace->syscalls.table[i]); + free(trace->syscalls.table); + } syscalltbl__delete(trace->sctbl); zfree(&trace->perfconfig_events); } From 3cb4d5e00e037c70f239173bdd399a7e6040830f Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:22 +0200 Subject: [PATCH 455/714] perf trace: Free syscall tp fields in evsel->priv ASan reports several memory leaks running: # perf test "88: Check open filename arg using perf trace + vfs_getname" The third of these leaks is related to evsel->priv fields of sycalls never being deallocated. This patch adds the function evlist__free_syscall_tp_fields which iterates over all evsels in evlist, matching syscalls, and calling the missing frees. This new function is called at the end of trace__run, right before calling evlist__delete. Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/46526611904ec5ff2768b59014e3afce8e0197d1.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4039ff4c078831..0d9e58190f15b5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3103,6 +3103,21 @@ static struct evsel *evsel__new_pgfault(u64 config) return evsel; } +static void evlist__free_syscall_tp_fields(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + struct evsel_trace *et = evsel->priv; + + if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls")) + continue; + + free(et->fmt); + free(et); + } +} + static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { const u32 type = event->header.type; @@ -4138,7 +4153,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) out_delete_evlist: trace__symbols__exit(trace); - + evlist__free_syscall_tp_fields(evlist); evlist__delete(evlist); cgroup__put(trace->cgroup); trace->evlist = NULL; From 659ede7d13f1cc37882088deecbc085da285b8f8 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:23 +0200 Subject: [PATCH 456/714] perf trace: Free strings in trace__parse_events_option() ASan reports several memory leaks running: # perf test "88: Check open filename arg using perf trace + vfs_getname" The fourth of these leaks is related to some strings never being freed in trace__parse_events_option. This patch adds the missing frees. Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/34d08535b11124106b859790549991abff5a7de8.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0d9e58190f15b5..9c265fa96011f7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4659,6 +4659,9 @@ static int trace__parse_events_option(const struct option *opt, const char *str, err = parse_events_option(&o, lists[0], 0); } out: + free(strace_groups_dir); + free(lists[0]); + free(lists[1]); if (sep) *sep = ','; From 704adfb5a9978462cd861f170201ae2b5e3d3a80 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 15 Jul 2021 00:02:06 -0400 Subject: [PATCH 457/714] tracing: Do not reference char * as a string in histograms The histogram logic was allowing events with char * pointers to be used as normal strings. But it was easy to crash the kernel with: # echo 'hist:keys=filename' > events/syscalls/sys_enter_openat/trigger And open some files, and boom! BUG: unable to handle page fault for address: 00007f2ced0c3280 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1173fa067 P4D 1173fa067 PUD 1171b6067 PMD 1171dd067 PTE 0 Oops: 0000 [#1] PREEMPT SMP CPU: 6 PID: 1810 Comm: cat Not tainted 5.13.0-rc5-test+ #61 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:strlen+0x0/0x20 Code: f6 82 80 2a 0b a9 20 74 11 0f b6 50 01 48 83 c0 01 f6 82 80 2a 0b a9 20 75 ef c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 RSP: 0018:ffffbdbf81567b50 EFLAGS: 00010246 RAX: 0000000000000003 RBX: ffff93815cdb3800 RCX: ffff9382401a22d0 RDX: 0000000000000100 RSI: 0000000000000000 RDI: 00007f2ced0c3280 RBP: 0000000000000100 R08: ffff9382409ff074 R09: ffffbdbf81567c98 R10: ffff9382409ff074 R11: 0000000000000000 R12: ffff9382409ff074 R13: 0000000000000001 R14: ffff93815a744f00 R15: 00007f2ced0c3280 FS: 00007f2ced0f8580(0000) GS:ffff93825a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2ced0c3280 CR3: 0000000107069005 CR4: 00000000001706e0 Call Trace: event_hist_trigger+0x463/0x5f0 ? find_held_lock+0x32/0x90 ? sched_clock_cpu+0xe/0xd0 ? lock_release+0x155/0x440 ? kernel_init_free_pages+0x6d/0x90 ? preempt_count_sub+0x9b/0xd0 ? kernel_init_free_pages+0x6d/0x90 ? get_page_from_freelist+0x12c4/0x1680 ? __rb_reserve_next+0xe5/0x460 ? ring_buffer_lock_reserve+0x12a/0x3f0 event_triggers_call+0x52/0xe0 ftrace_syscall_enter+0x264/0x2c0 syscall_trace_enter.constprop.0+0x1ee/0x210 do_syscall_64+0x1c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Where it triggered a fault on strlen(key) where key was the filename. The reason is that filename is a char * to user space, and the histogram code just blindly dereferenced it, with obvious bad results. I originally tried to use strncpy_from_user/kernel_nofault() but found that there's other places that its dereferenced and not worth the effort. Just do not allow "char *" to act like strings. Link: https://lkml.kernel.org/r/20210715000206.025df9d2@rorschach.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Masami Hiramatsu Cc: Tzvetomir Stoyanov Cc: stable@vger.kernel.org Acked-by: Namhyung Kim Acked-by: Tom Zanussi Fixes: 79e577cbce4c4 ("tracing: Support string type key properly") Fixes: 5967bd5c4239 ("tracing: Let filter_assign_type() detect FILTER_PTR_STRING") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0207aeed31e662..16a9dfc9fffc3a 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1689,7 +1689,9 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (WARN_ON_ONCE(!field)) goto out; - if (is_string_field(field)) { + /* Pointers to strings are just pointers and dangerous to dereference */ + if (is_string_field(field) && + (field->filter_type != FILTER_PTR_STRING)) { flags |= HIST_FIELD_FL_STRING; hist_field->size = MAX_FILTER_STR_VAL; @@ -4495,8 +4497,6 @@ static inline void add_to_key(char *compound_key, void *key, field = key_field->field; if (field->filter_type == FILTER_DYN_STRING) size = *(u32 *)(rec + field->offset) >> 16; - else if (field->filter_type == FILTER_PTR_STRING) - size = strlen(key); else if (field->filter_type == FILTER_STATIC_STRING) size = field->size; From 507345b5ae6a57b7ecd7550ff39282ed20de7b8d Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 12 Jul 2021 12:38:24 -0300 Subject: [PATCH 458/714] cifs: handle reconnect of tcon when there is no cached dfs referral When there is no cached DFS referral of tcon->dfs_path, then reconnect to same share. Signed-off-by: Paulo Alcantara (SUSE) Cc: Signed-off-by: Steve French --- fs/cifs/connect.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f1a7e63ab58fff..d49c9e5c33faea 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4144,7 +4144,8 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru if (!tree) return -ENOMEM; - if (!tcon->dfs_path) { + /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ + if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) { if (tcon->ipc) { scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); @@ -4154,9 +4155,6 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru goto out; } - rc = dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl); - if (rc) - goto out; isroot = ref.server_type == DFS_TYPE_ROOT; free_dfs_info_param(&ref); From 63f94e946fafcfc5080b4a4aec9770158268e4ee Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 13 Jul 2021 19:40:33 -0500 Subject: [PATCH 459/714] cifs: fix missing null session check in mount Although it is unlikely to be have ended up with a null session pointer calling cifs_try_adding_channels in cifs_mount. Coverity correctly notes that we are already checking for it earlier (when we return from do_dfs_failover), so at a minimum to clarify the code we should make sure we also check for it when we exit the loop so we don't end up calling cifs_try_adding_channels or mount_setup_tlink with a null ses pointer. Addresses-Coverity: 1505608 ("Derefernce after null check") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d49c9e5c33faea..a1e87023211457 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3508,7 +3508,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) rc = -ELOOP; } while (rc == -EREMOTE); - if (rc || !tcon) + if (rc || !tcon || !ses) goto error; kfree(ref_path); From 16dd9b8c31aee7ae074fa3ee36a797e9ba9f7e4f Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 14 Jul 2021 23:00:00 -0500 Subject: [PATCH 460/714] cifs: added WARN_ON for all the count decrements We have a few ref counters srv_count, ses_count and tc_count which we use for ref counting. Added a WARN_ON during the decrement of each of these counters to make sure that they don't go below their minimum values. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 9 +++++++++ fs/cifs/smb2ops.c | 2 ++ 2 files changed, 11 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a1e87023211457..6b6c3e341b4289 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1295,6 +1295,9 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) return; } + /* srv_count can never go negative */ + WARN_ON(server->srv_count < 0); + put_net(cifs_net_ns(server)); list_del_init(&server->tcp_ses_list); @@ -1654,6 +1657,9 @@ void cifs_put_smb_ses(struct cifs_ses *ses) } spin_unlock(&cifs_tcp_ses_lock); + /* ses_count can never go negative */ + WARN_ON(ses->ses_count < 0); + spin_lock(&GlobalMid_Lock); if (ses->status == CifsGood) ses->status = CifsExiting; @@ -2021,6 +2027,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) return; } + /* tc_count can never go negative */ + WARN_ON(tcon->tc_count < 0); + if (tcon->use_witness) { int rc; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 232d528df230d3..ba3c58e1f72565 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2910,6 +2910,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, /* ipc tcons are not refcounted */ spin_lock(&cifs_tcp_ses_lock); tcon->tc_count--; + /* tc_count can never go negative */ + WARN_ON(tcon->tc_count < 0); spin_unlock(&cifs_tcp_ses_lock); } kfree(utf16_path); From 4511d7c8f440ab13520601141d67d13cb074620a Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 14 Jul 2021 23:32:09 -0500 Subject: [PATCH 461/714] SMB3.1.1: fix mount failure to some servers when compression enabled When sending the compression context to some servers, they rejected the SMB3.1.1 negotiate protocol because they expect the compression context to have a data length of a multiple of 8. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4b27cb9105fd56..e9cac7970b66bd 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -394,6 +394,7 @@ struct smb2_compression_capabilities_context { __u16 Padding; __u32 Flags; __le16 CompressionAlgorithms[3]; + __u16 Pad; /* Some servers require pad to DataLen multiple of 8 */ /* Check if pad needed */ } __packed; From d936eb23874433caa3e3d841cfa16f5434b85dcf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Jul 2021 18:05:31 -0700 Subject: [PATCH 462/714] Revert "Makefile: Enable -Wimplicit-fallthrough for Clang" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b7eb335e26a9c7f258c96b3962c283c379d3ede0. It turns out that the problem with the clang -Wimplicit-fallthrough warning is not about the kernel source code, but about clang itself, and that the warning is unusable until clang fixes its broken ways. In particular, when you enable this warning for clang, you not only get warnings about implicit fallthroughs. You also get this: warning: fallthrough annotation in unreachable code [-Wimplicit-fallthrough] which is completely broken becasue it (a) doesn't even tell you where the problem is (seriously: no line numbers, no filename, no nothing). (b) is fundamentally broken anyway, because there are perfectly valid reasons to have a fallthrough statement even if it turns out that it can perhaps not be reached. In the kernel, an example of that second case is code in the scheduler: switch (state) { case cpuset: if (IS_ENABLED(CONFIG_CPUSETS)) { cpuset_cpus_allowed_fallback(p); state = possible; break; } fallthrough; case possible: where if CONFIG_CPUSETS is enabled you actually never hit the fallthrough case at all. But that in no way makes the fallthrough wrong. So the warning is completely broken, and enabling it for clang is a very bad idea. In the meantime, we can keep the gcc option enabled, and make the gcc build use -Wimplicit-fallthrough=5 which means that we will at least continue to require a proper fallthrough statement, and that gcc won't silently accept the magic comment versions. Because gcc does this all correctly, and while the odd "=5" part is kind of obscure, it's documented in [1]: "-Wimplicit-fallthrough=5 doesn’t recognize any comments as fallthrough comments, only attributes disable the warning" so if clang ever fixes its bad behavior we can try enabling it there again. Link: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html [1] Cc: Kees Cook Cc: Gustavo A. R. Silva Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Linus Torvalds --- Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index eaa692976851af..e97e7548315caf 100644 --- a/Makefile +++ b/Makefile @@ -797,6 +797,12 @@ KBUILD_CFLAGS += -Wno-gnu # source of a reference will be _MergedGlobals and not on of the whitelisted names. # See modpost pattern 2 KBUILD_CFLAGS += -mno-global-merge +else + +# Warn about unmarked fall-throughs in switch statement. +# Disabled for clang while comment to attribute conversion happens and +# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed. +KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,) endif # These warnings generated too much noise in a regular build. @@ -977,9 +983,6 @@ NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) # warn about C99 declaration after statement KBUILD_CFLAGS += -Wdeclaration-after-statement -# Warn about unmarked fall-throughs in switch statement. -KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,$(call cc-option,-Wimplicit-fallthrough,)) - # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla From 2f53d15cf95824ed320abed3c33759b8b21aca15 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 30 Jun 2021 10:33:36 -0400 Subject: [PATCH 463/714] zonefs: remove redundant null bio check bio_alloc() with __GFP_DIRECT_RECLAIM, which is included in GFP_NOFS, never fails, see comments in bio_alloc_bioset(). Signed-off-by: Xianting Tian Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index dbf03635869c30..70055d486bf70b 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -705,9 +705,6 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) return 0; bio = bio_alloc(GFP_NOFS, nr_pages); - if (!bio) - return -ENOMEM; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = zi->i_zsector; bio->bi_write_hint = iocb->ki_hint; From cdc3363065aba2711e51019b3d5787f044f8a133 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 15 Jul 2021 21:53:53 -0300 Subject: [PATCH 464/714] cifs: do not share tcp sessions of dfs connections Make sure that we do not share tcp sessions of dfs mounts when mounting regular shares that connect to same server. DFS connections rely on a single instance of tcp in order to do failover properly in cifs_reconnect(). Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/connect.c | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ade7080f7eaa89..c0bfc2f01030df 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -693,6 +693,9 @@ struct TCP_Server_Info { bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif +#ifdef CONFIG_CIFS_DFS_UPCALL + bool is_dfs_conn; /* if a dfs connection */ +#endif }; struct cifs_credits { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6b6c3e341b4289..1b04d6ec14ddac 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1268,6 +1268,16 @@ cifs_find_tcp_session(struct smb3_fs_context *ctx) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { +#ifdef CONFIG_CIFS_DFS_UPCALL + /* + * DFS failover implementation in cifs_reconnect() requires unique tcp sessions for + * DFS connections to do failover properly, so avoid sharing them with regular + * shares or even links that may connect to same server but having completely + * different failover targets. + */ + if (server->is_dfs_conn) + continue; +#endif /* * Skip ses channels since they're only handled in lower layers * (e.g. cifs_send_recv). @@ -2968,6 +2978,23 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, } #ifdef CONFIG_CIFS_DFS_UPCALL +static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, + unsigned int *xid, struct TCP_Server_Info **nserver, + struct cifs_ses **nses, struct cifs_tcon **ntcon) +{ + int rc; + + ctx->nosharesock = true; + rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon); + if (*nserver) { + cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__); + spin_lock(&cifs_tcp_ses_lock); + (*nserver)->is_dfs_conn = true; + spin_unlock(&cifs_tcp_ses_lock); + } + return rc; +} + /* * cifs_build_path_to_root returns full path to root when we do not have an * existing connection (tcon) @@ -3163,7 +3190,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ tmp_ctx.prepath); mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); - rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon); + rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon); if (!rc || (*server && *ses)) { /* * We were able to connect to new target server. Update current context with @@ -3462,7 +3489,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; } - ctx->nosharesock = true; + mount_put_conns(cifs_sb, xid, server, ses, tcon); + /* + * Ignore error check here because we may failover to other targets from cached a + * referral. + */ + (void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); /* Get path of DFS root */ ref_path = build_unc_path_to_root(ctx, cifs_sb, false); @@ -3491,7 +3523,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) /* Connect to new DFS target only if we were redirected */ if (oldmnt != cifs_sb->ctx->mount_options) { mount_put_conns(cifs_sb, xid, server, ses, tcon); - rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); + rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); } if (rc && !server && !ses) { /* Failed to connect. Try to connect to other targets in the referral. */ From fa5239f2af983ffdf08395a542a7d6356b6222c5 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Jul 2021 15:05:37 -0400 Subject: [PATCH 465/714] drm/amdgpu: workaround failed COW checks for Thunk VMAs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KFD Thunk maps invisible VRAM BOs with PROT_NONE, MAP_PRIVATE. is_cow_mapping returns true for these mappings, which causes mmap to fail in ttm_bo_mmap_obj. As a workaround, clear VM_MAYWRITE for PROT_NONE-COW mappings. This should prevent the mapping from ever becoming writable and makes is_cow_mapping(vm_flags) false. Fixes: f91142c62161 ("drm/ttm: nuke VM_MIXEDMAP on BO mappings v3") Suggested-by: Daniel Vetter Tested-by: Felix Kuehling Signed-off-by: Felix Kuehling Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210715190537.585456-1-Felix.Kuehling@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b3404c43a9111e..9f952b7fc19736 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -255,6 +255,15 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings + * for debugger access to invisible VRAM. Should have used MAP_SHARED + * instead. Clearing VM_MAYWRITE prevents the mapping from ever + * becoming writable and makes is_cow_mapping(vm_flags) false. + */ + if (is_cow_mapping(vma->vm_flags) && + !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + vma->vm_flags &= ~VM_MAYWRITE; + return drm_gem_ttm_mmap(obj, vma); } From 40ac971eab89330d6153e7721e88acd2d98833f9 Mon Sep 17 00:00:00 2001 From: Roman Skakun Date: Fri, 16 Jul 2021 11:39:34 +0300 Subject: [PATCH 466/714] dma-mapping: handle vmalloc addresses in dma_common_{mmap,get_sgtable} xen-swiotlb can use vmalloc backed addresses for dma coherent allocations and uses the common helpers. Properly handle them to unbreak Xen on ARM platforms. Fixes: 1b65c4e5a9af ("swiotlb-xen: use xen_alloc/free_coherent_pages") Signed-off-by: Roman Skakun Reviewed-by: Andrii Anisov [hch: split the patch, renamed the helpers] Signed-off-by: Christoph Hellwig --- kernel/dma/ops_helpers.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 910ae69cae7774..af4a6ef48ce04a 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -5,6 +5,13 @@ */ #include +static struct page *dma_common_vaddr_to_page(void *cpu_addr) +{ + if (is_vmalloc_addr(cpu_addr)) + return vmalloc_to_page(cpu_addr); + return virt_to_page(cpu_addr); +} + /* * Create scatter-list for the already allocated DMA buffer. */ @@ -12,7 +19,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret; ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -32,6 +39,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); @@ -43,7 +51,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + page_to_pfn(page) + vma->vm_pgoff, user_count << PAGE_SHIFT, vma->vm_page_prot); #else return -ENXIO; From f99986c0fcad8e1d7d842e9a636f55bcc6748da5 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 16 Jul 2021 11:57:35 +0100 Subject: [PATCH 467/714] ASoC: codecs: wcd938x: setup irq during component bind SoundWire registers are only accessable after sdw components are succesfully binded. Setup irqs at that point instead of doing at probe. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210716105735.6073-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 78b76eceff8fa2..2fcc97370be2bb 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3317,13 +3317,6 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component) (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0); } - ret = wcd938x_irq_init(wcd938x, component->dev); - if (ret) { - dev_err(component->dev, "%s: IRQ init failed: %d\n", - __func__, ret); - return ret; - } - wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, WCD938X_IRQ_HPHR_PDM_WD_INT); wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, @@ -3553,7 +3546,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { @@ -3562,7 +3554,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); if (!wcd938x->tx_sdw_dev) { dev_err(dev, "could not get txslave with matching of dev\n"); @@ -3595,6 +3586,15 @@ static int wcd938x_bind(struct device *dev) return PTR_ERR(wcd938x->regmap); } + ret = wcd938x_irq_init(wcd938x, dev); + if (ret) { + dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); + return ret; + } + + wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; + wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; + ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); From 59dd33f82dc0975c55d3d46801e7ca45532d7673 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 16 Jul 2021 18:00:12 +0530 Subject: [PATCH 468/714] ASoC: soc-pcm: add a flag to reverse the stop sequence On stream stop, currently CPU DAI stop sequence invoked first followed by DMA. For Few platforms, it is required to stop the DMA first before stopping CPU DAI. Introduced new flag in dai_link structure for reordering stop sequence. Based on flag check, ASoC core will re-order the stop sequence. Fixes: 4378f1fbe92405 ("ASoC: soc-pcm: Use different sequence for start/stop trigger") Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20210716123015.15697-1-vijendar.mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc.h | 6 ++++++ sound/soc/soc-pcm.c | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 675849d07284af..8e6dd8a257c567 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -712,6 +712,12 @@ struct snd_soc_dai_link { /* Do not create a PCM for this DAI link (Backend link) */ unsigned int ignore:1; + /* This flag will reorder stop sequence. By enabling this flag + * DMA controller stop sequence will be invoked first followed by + * CPU DAI driver stop sequence + */ + unsigned int stop_dma_first:1; + #ifdef CONFIG_SND_SOC_TOPOLOGY struct snd_soc_dobj dobj; /* For topology */ #endif diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 46513bb9790447..d1c570ca21ea78 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1015,6 +1015,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); int ret = -EINVAL, _ret = 0; int rollback = 0; @@ -1055,14 +1056,23 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); - if (ret < 0) - break; + if (rtd->dai_link->stop_dma_first) { + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; - ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); - if (ret < 0) - break; + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } else { + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } ret = snd_soc_link_trigger(substream, cmd, rollback); break; } From 7883490cba002121a5870e786a1dc0acce5e1caf Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 16 Jul 2021 18:00:13 +0530 Subject: [PATCH 469/714] ASoC: amd: reverse stop sequence for stoneyridge platform For Stoneyridge platform, it is required to invoke DMA driver stop first rather than invoking DWC I2S controller stop. Enable dai_link structure stop_dma_fist flag to reverse the stop sequence. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20210716123015.15697-2-vijendar.mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-da7219-max98357a.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 84e3906abd4f33..9449fb40a956bd 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -576,6 +576,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { | SND_SOC_DAIFMT_CBM_CFM, .init = cz_rt5682_init, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_play_ops, SND_SOC_DAILINK_REG(designware1, rt5682, platform), }, @@ -585,6 +586,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_cap_ops, SND_SOC_DAILINK_REG(designware2, rt5682, platform), }, @@ -594,6 +596,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_max_play_ops, SND_SOC_DAILINK_REG(designware3, mx, platform), }, @@ -604,6 +607,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic0_cap_ops, SND_SOC_DAILINK_REG(designware3, adau, platform), }, @@ -614,6 +618,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic1_cap_ops, SND_SOC_DAILINK_REG(designware2, adau, platform), }, From 6a503e1c455316fd0bfd8188c0a62cce7c5525ca Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Fri, 16 Jul 2021 16:58:53 +0800 Subject: [PATCH 470/714] ASoC: rt5682: Fix the issue of garbled recording after powerd_dbus_suspend While using the DMIC recording, the garbled data will be captured by the DMIC. It is caused by the critical power of PLL closed in the jack detect function. Signed-off-by: Oder Chiou Link: https://lore.kernel.org/r/20210716085853.20170-1-oder_chiou@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index e4c91571abaefb..abcd6f48378880 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -973,10 +973,14 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS")) + if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0); - if (!snd_soc_dapm_get_pin_status(dapm, "Vref2")) + if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, From 0ccfd1ba84a4503b509250941af149e9ebd605ca Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Fri, 16 Jul 2021 14:33:12 +0000 Subject: [PATCH 471/714] spi: spi-cadence-quadspi: Revert "Fix division by zero warning" Revert to change to a better code. This reverts commit 55cef88bbf12f3bfbe5c2379a8868a034707e755. Signed-off-by: Yoshitaka Ikeda Link: https://lore.kernel.org/r/bd30bdb4-07c4-f713-5648-01c898d51f1b@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 13d1f0ce618eb8..7a00346ff9b92b 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -307,13 +307,11 @@ static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata) static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { - unsigned int dummy_clk = 0; + unsigned int dummy_clk; - if (op->dummy.buswidth && op->dummy.nbytes) { - dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); - if (dtr) - dummy_clk /= 2; - } + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); + if (dtr) + dummy_clk /= 2; return dummy_clk; } From 0e85ee897858b1c7a5de53f496d016899d9639c5 Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Fri, 16 Jul 2021 14:35:13 +0000 Subject: [PATCH 472/714] spi: spi-cadence-quadspi: Fix division by zero warning Fix below division by zero warning: - The reason for dividing by zero is because the dummy bus width is zero, but if the dummy n bytes is zero, it indicates that there is no data transfer, so we can just return zero without doing any calculations. [ 0.795337] Division by zero in kernel. : [ 0.834051] [<807fd40c>] (__div0) from [<804e1acc>] (Ldiv0+0x8/0x10) [ 0.839097] [<805f0710>] (cqspi_exec_mem_op) from [<805edb4c>] (spi_mem_exec_op+0x3b0/0x3f8) Fixes: 7512eaf54190 ("spi: cadence-quadspi: Fix dummy cycle calculation when buswidth > 1") Signed-off-by: Yoshitaka Ikeda Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/92eea403-9b21-2488-9cc1-664bee760c5e@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 7a00346ff9b92b..d62d69dd72b9d0 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -309,6 +309,9 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { unsigned int dummy_clk; + if (!op->dummy.nbytes) + return 0; + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); if (dtr) dummy_clk /= 2; From 674a9f1f6815849bfb5bf385e7da8fc198aaaba9 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 8 Jul 2021 11:46:54 +0200 Subject: [PATCH 473/714] efi/tpm: Differentiate missing and invalid final event log table. Missing TPM final event log table is not a firmware bug. Clearly if providing event log in the old format makes the final event log invalid it should not be provided at least in that case. Fixes: b4f1874c6216 ("tpm: check event log version before reading final events") Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/tpm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index c1955d320fecd6..8f665678e9e398 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -62,9 +62,11 @@ int __init efi_tpm_eventlog_init(void) tbl_size = sizeof(*log_tbl) + log_tbl->size; memblock_reserve(efi.tpm_log, tbl_size); - if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR || - log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { - pr_warn(FW_BUG "TPM Final Events table missing or invalid\n"); + if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { + pr_info("TPM Final Events table not present\n"); + goto out; + } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { + pr_warn(FW_BUG "TPM Final Events table invalid\n"); goto out; } From 2bab693a608bdf614b9fcd44083c5100f34b9f77 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Jul 2021 19:43:26 +0100 Subject: [PATCH 474/714] firmware/efi: Tell memblock about EFI iomem reservations kexec_load_file() relies on the memblock infrastructure to avoid stamping over regions of memory that are essential to the survival of the system. However, nobody seems to agree how to flag these regions as reserved, and (for example) EFI only publishes its reservations in /proc/iomem for the benefit of the traditional, userspace based kexec tool. On arm64 platforms with GICv3, this can result in the payload being placed at the location of the LPI tables. Shock, horror! Let's augment the EFI reservation code with a memblock_reserve() call, protecting our dear tables from the secondary kernel invasion. Reported-by: Moritz Fischer Tested-by: Moritz Fischer Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Cc: Ard Biesheuvel Cc: James Morse Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4b7ee3fa9224ff..847f33ffc4aede 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -896,6 +896,7 @@ static int __init efi_memreserve_map_root(void) static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) { struct resource *res, *parent; + int ret; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) @@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) /* we expect a conflict with a 'System RAM' region */ parent = request_resource_conflict(&iomem_resource, res); - return parent ? request_resource(parent, res) : 0; + ret = parent ? request_resource(parent, res) : 0; + + /* + * Given that efi_mem_reserve_iomem() can be called at any + * time, only call memblock_reserve() if the architecture + * keeps the infrastructure around. + */ + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret) + memblock_reserve(addr, size); + + return ret; } int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) From 947228cb9f1a2c69a5da5279c48f02bb4f49ce32 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 2 Jul 2021 12:10:44 -0700 Subject: [PATCH 475/714] efi/libstub: Fix the efi_load_initrd function description The soft_limit and hard_limit in the function efi_load_initrd describes the preferred and max address of initrd loading location respectively. However, the description wrongly describes it as the size of the allocated memory. Fix the function description. Signed-off-by: Atish Patra Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aa8da0a4982941..ae87dded989db2 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -630,8 +630,8 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, * @image: EFI loaded image protocol * @load_addr: pointer to loaded initrd * @load_size: size of loaded initrd - * @soft_limit: preferred size of allocated memory for loading the initrd - * @hard_limit: minimum size of allocated memory + * @soft_limit: preferred address for loading the initrd + * @hard_limit: upper limit address for loading the initrd * * Return: status code */ From 937654ce497fb6e977a8c52baee5f7d9616302d9 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:24 +0200 Subject: [PATCH 476/714] perf test bpf: Free obj_buf ASan reports some memory leaks when running: # perf test "42: BPF filter" The first of these leaks is caused by obj_buf never being deallocated in __test__bpf. This patch adds the missing free. Signed-off-by: Riccardo Mancini Fixes: ba1fae431e74bb42 ("perf test: Add 'perf test BPF'") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lore.kernel.org/lkml/60f3ca935fe6672e7e866276ce6264c9e26e4c87.1626343282.git.rickyman7@gmail.com [ Added missing stdlib.h include ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 33bda9c2654235..dbf5f5215abeef 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -276,6 +277,7 @@ static int __test__bpf(int idx) } out: + free(obj_buf); bpf__clear(); return ret; } From 9cb2ff11171264d10be7ea9e31d9ee5d49ba84a5 Mon Sep 17 00:00:00 2001 From: Apurva Nandan Date: Tue, 13 Jul 2021 12:57:41 +0000 Subject: [PATCH 477/714] spi: cadence-quadspi: Disable Auto-HW polling cadence-quadspi has a builtin Auto-HW polling funtionality using which it keep tracks of completion of write operations. When Auto-HW polling is enabled, it automatically initiates status register read operation, until the flash clears its busy bit. cadence-quadspi controller doesn't allow an address phase when auto-polling the busy bit on the status register. Unlike SPI NOR flashes, SPI NAND flashes do require the address of status register when polling the busy bit using the read register operation. As Auto-HW polling is enabled by default, cadence-quadspi returns a timeout for every write operation after an indefinite amount of polling on SPI NAND flashes. Disable Auto-HW polling completely as the spi-nor core, spinand core, etc. take care of polling the busy bit on their own. Signed-off-by: Apurva Nandan Link: https://lore.kernel.org/r/20210713125743.1540-2-a-nandan@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index d62d69dd72b9d0..a2de23516553c1 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -800,19 +800,20 @@ static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata, reg = cqspi_calc_rdreg(f_pdata); writel(reg, reg_base + CQSPI_REG_RD_INSTR); - if (f_pdata->dtr) { - /* - * Some flashes like the cypress Semper flash expect a 4-byte - * dummy address with the Read SR command in DTR mode, but this - * controller does not support sending address with the Read SR - * command. So, disable write completion polling on the - * controller's side. spi-nor will take care of polling the - * status register. - */ - reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL); - reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL; - writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL); - } + /* + * SPI NAND flashes require the address of the status register to be + * passed in the Read SR command. Also, some SPI NOR flashes like the + * cypress Semper flash expect a 4-byte dummy address in the Read SR + * command in DTR mode. + * + * But this controller does not support address phase in the Read SR + * command when doing auto-HW polling. So, disable write completion + * polling on the controller's side. spinand and spi-nor will take + * care of polling the status register. + */ + reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL); + reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL; + writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL); reg = readl(reg_base + CQSPI_REG_SIZE); reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; From 50d8d7e19c4398da74d028f367754e73547b078b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Jul 2021 12:19:36 +0200 Subject: [PATCH 478/714] dt-bindings: display: renesas,du: Make resets optional on R-Car H1 The "resets" property is not present on R-Car Gen1 SoCs. Supporting it would require migrating from renesas,cpg-clocks to renesas,cpg-mssr. Reflect this in the DT bindings by removing the global "required: resets". All SoCs that do have "resets" properties already have SoC-specific rules making it required. Fixes: 99d66127fad25ebb ("dt-bindings: display: renesas,du: Convert binding to YAML") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/98575791b154d80347d5b78132c1d53f5315ee62.1626257936.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/renesas,du.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/renesas,du.yaml b/Documentation/devicetree/bindings/display/renesas,du.yaml index 5f4345d43020fd..e3ca5389c17d34 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.yaml +++ b/Documentation/devicetree/bindings/display/renesas,du.yaml @@ -92,7 +92,6 @@ required: - reg - clocks - interrupts - - resets - ports allOf: From ea272ce46f3c86d15d9b58bd4d8d44de6cee04b7 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Fri, 9 Jul 2021 13:00:11 +0530 Subject: [PATCH 479/714] amdgpu/nv.c - Added video codec support for Yellow Carp Added the supported codecs in the video capabilities query. Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94a2c0742ee5e8..04f6cf38c55267 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -333,6 +333,19 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = { .codec_array = NULL, }; +/* Yellow Carp*/ +static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, +}; + +static const struct amdgpu_video_codecs yc_video_codecs_decode = { + .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), + .codec_array = bg_video_codecs_decode_array, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -353,12 +366,17 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: if (encode) *codecs = &nv_video_codecs_encode; else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_YELLOW_CARP: + if (encode) + *codecs = &nv_video_codecs_encode; + else + *codecs = &yc_video_codecs_decode; + return 0; case CHIP_BEIGE_GOBY: if (encode) *codecs = &bg_video_codecs_encode; From 6505d6fcc616472c1b4d6298beacf52673c7b072 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Tue, 13 Jul 2021 23:21:43 +0530 Subject: [PATCH 480/714] amdgpu/nv.c - Optimize code for video codec support structure Optimized the code for codec info structure initialization Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 + drivers/gpu/drm/amd/amdgpu/nv.c | 223 ++++------------------------ drivers/gpu/drm/amd/amdgpu/soc15.c | 176 +++------------------- 3 files changed, 56 insertions(+), 350 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c0316eaba547c6..8ac6eb9f1fdb8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -619,6 +619,13 @@ struct amdgpu_video_codec_info { u32 max_level; }; +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + struct amdgpu_video_codecs { const u32 codec_count; const struct amdgpu_video_codec_info *codec_array; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 04f6cf38c55267..cf73a6923203d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -64,32 +64,13 @@ #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" -#define codec_info_build(type, width, height, level) \ - .codec_type = type,\ - .max_width = width,\ - .max_height = height,\ - .max_pixels_per_frame = height * width,\ - .max_level = level, - static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_encode = @@ -101,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_decode = @@ -161,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs sc_video_codecs_decode = @@ -228,80 +119,20 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode = /* SRIOV Sienna Cichlid, not const since data is controlled by host */ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b02436401d46f4..b7d350be805020 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -88,20 +88,8 @@ /* Vega, Raven, Arcturus */ static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_encode = @@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs rv_video_codecs_decode = @@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs rn_video_codecs_decode = From 4fff6fbca12524358a32e56f125ae738141f62b4 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Wed, 14 Jul 2021 15:07:22 +0800 Subject: [PATCH 481/714] drm/amdgpu: update the golden setting for vangogh This patch is to update the golden setting for vangogh. Signed-off-by: Xiaojian Du Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f5e9c022960bbf..a86a0b347e7394 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3379,6 +3379,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), From 3e94b5965e624f7e6d8dd18eb8f3bf2bb99ba30d Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 15 Jul 2021 11:08:48 +0800 Subject: [PATCH 482/714] drm/amdgpu: update golden setting for sienna_cichlid Update GFX golden setting for sienna_cichlid. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a86a0b347e7394..f4771f39a28094 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3300,6 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; From cfe4e8f00f8f19ba305800f64962d1949ab5d4ca Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 15 Jul 2021 14:49:08 +0800 Subject: [PATCH 483/714] drm/amdgpu: update gc golden setting for dimgrey_cavefish Update gc_10_3_4 golden setting. Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f4771f39a28094..a64b2c706090ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3447,6 +3447,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) From bd89c991c6c26fb215c63bd21b6d56e7a4ba2ef6 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 15 Jul 2021 14:52:37 +0800 Subject: [PATCH 484/714] drm/amd/pm: update DRIVER_IF_VERSION for beige_goby Update the version to 0xD for beige_goby. Signed-off-by: Tao Zhou Reviewed-by: Jack Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 1962a587719113..f61b5c914a3d92 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -34,7 +34,7 @@ #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF -#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9 +#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD /* MP Apertures */ #define MP0_Public 0x03800000 From 353ca0fa56307bfc821a6fb444099e71899f199d Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Wed, 14 Jul 2021 09:06:52 +0100 Subject: [PATCH 485/714] drm/amd/display: Fix 10bit 4K display on CIK GPUs Commit 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at 30bpp for DCE-11.0.") doesn't seems to have fixed 10bit 4K rendering over DisplayPort for CIK GPUs. On my machine with a HAWAII GPU I get a broken image that looks like it has an effective resolution of 1920x1080 but scaled up in an irregular way. Reverting the commit or applying this patch fixes the problem on v5.14-rc1. Fixes: 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at 30bpp for DCE-11.0.") Acked-by: Mario Kleiner Reviewed-by: Harry Wentland Signed-off-by: Liviu Dudau Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a6a67244a322e9..1596f6b7fed7c1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 * did not show such problems, so this seems to be the exception. */ - if (plane_state->ctx->dce_version != DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_11_0) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; From 2cc3aeb5ecccec0d266813172fcd82b4b5fa5803 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Fri, 16 Jul 2021 10:02:18 +0300 Subject: [PATCH 486/714] skbuff: Fix a potential race while recycling page_pool packets As Alexander points out, when we are trying to recycle a cloned/expanded SKB we might trigger a race. The recycling code relies on the pp_recycle bit to trigger, which we carry over to cloned SKBs. If that cloned SKB gets expanded or if we get references to the frags, call skb_release_data() and overwrite skb->head, we are creating separate instances accessing the same page frags. Since the skb_release_data() will first try to recycle the frags, there's a potential race between the original and cloned SKB, since both will have the pp_recycle bit set. Fix this by explicitly those SKBs not recyclable. The atomic_sub_return effectively limits us to a single release case, and when we are calling skb_release_data we are also releasing the option to perform the recycling, or releasing the pages from the page pool. Fixes: 6a5bcd84e886 ("page_pool: Allow drivers to hint on SKB recycling") Reported-by: Alexander Duyck Suggested-by: Alexander Duyck Reviewed-by: Alexander Duyck Acked-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller --- net/core/skbuff.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f63de967ac2593..0fe97d6607902a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) - return; + goto exit; skb_zcopy_clear(skb, true); @@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb) kfree_skb_list(shinfo->frag_list); skb_free_head(skb); +exit: + /* When we clone an SKB we copy the reycling bit. The pp_recycle + * bit is only set on the head though, so in order to avoid races + * while trying to recycle fragments on __skb_frag_unref() we need + * to make one SKB responsible for triggering the recycle path. + * So disable the recycling bit if an SKB is cloned and we have + * additional references to to the fragmented part of the SKB. + * Eventually the last SKB will have the recycling bit set and it's + * dataref set to 0, which will trigger the recycling + */ + skb->pp_recycle = 0; } /* From 11d8d98cbeef1496469b268d79938b05524731e8 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Fri, 16 Jul 2021 17:36:39 +0200 Subject: [PATCH 487/714] mt7530 fix mt7530_fdb_write vid missing ivl bit According to reference guides mt7530 (mt7620) and mt7531: NOTE: When IVL is reset, MAC[47:0] and FID[2:0] will be used to read/write the address table. When IVL is set, MAC[47:0] and CVID[11:0] will be used to read/write the address table. Since the function only fills in CVID and no FID, we need to set the IVL bit. The existing code does not set it. This is a fix for the issue I dropped here earlier: http://lists.infradead.org/pipermail/linux-mediatek/2021-June/025697.html With this patch, it is now possible to delete the 'self' fdb entry manually. However, wifi roaming still has the same issue, the entry does not get deleted automatically. Wifi roaming also needs a fix somewhere else to function correctly in combination with vlan. Signed-off-by: Eric Woudstra Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 1 + drivers/net/dsa/mt7530.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 93136f7e69f51f..9e4df35f92ccdb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,6 +366,7 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; + reg[1] |= ATA2_IVL; reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 334d610a503d9c..b19b389ff10ac6 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -79,6 +79,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_EMP 0 #define STATIC_ENT 3 #define MT7530_ATA2 0x78 +#define ATA2_IVL BIT(15) /* Register for address table write data */ #define MT7530_ATWD 0x7c From 5f291bfd33c8995c69f5a50f21445a4a93584ed2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Jul 2021 11:33:43 +0200 Subject: [PATCH 488/714] arm: Typo s/PCI_IXP4XX_LEGACY/IXP4XX_PCI_LEGACY/ Kconfig symbol PCI_IXP4XX_LEGACY does not exist, but IXP4XX_PCI_LEGACY does. Fixes: d5d9f7ac58ea1041 ("ARM/ixp4xx: Make NEED_MACH_IO_H optional") Signed-off-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/82ce37c617293521f095a945a255456b9512769c.1626255077.git.geert+renesas@glider.be' Signed-off-by: Arnd Bergmann --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3ea1c417339ff6..82f908fa5676ac 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -395,7 +395,7 @@ config ARCH_IXP4XX select IXP4XX_IRQ select IXP4XX_TIMER # With the new PCI driver this is not needed - select NEED_MACH_IO_H if PCI_IXP4XX_LEGACY + select NEED_MACH_IO_H if IXP4XX_PCI_LEGACY select USB_EHCI_BIG_ENDIAN_DESC select USB_EHCI_BIG_ENDIAN_MMIO help From 2096d6feec8359203de406c424242dcb977fe1d1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Jul 2021 14:27:03 +0200 Subject: [PATCH 489/714] ARM: configs: Update Integrator defconfig This updates the Integrator defconfig for the changes in the v5.14-rc1 kernel: - The Framebuffer CONFIG_FB needs to be explicitly selected or we don't get any framebuffer anymore. DRM has stopped to select FB because of circular dependency. - Drop the unused Matrox FB drivers that are only used with specific PCI cards. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Reviewed-by: Kees Cook Cc: Kees Cook Link: https://lore.kernel.org/r/20210714122703.212609-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/integrator_defconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig index b06e537d514903..4dfe321a79f6dd 100644 --- a/arch/arm/configs/integrator_defconfig +++ b/arch/arm/configs/integrator_defconfig @@ -57,10 +57,7 @@ CONFIG_DRM=y CONFIG_DRM_DISPLAY_CONNECTOR=y CONFIG_DRM_SIMPLE_BRIDGE=y CONFIG_DRM_PL111=y -CONFIG_FB_MODE_HELPERS=y -CONFIG_FB_MATROX=y -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_VGA_CONSOLE is not set CONFIG_LOGO=y From 56fa6e8a184489b47525488472e9bdcdcb59cd6f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Jul 2021 11:00:40 +0200 Subject: [PATCH 490/714] ARM: configs: Update RealView defconfig This updates the RealView defconfig for the changes in the v5.14-rc1 kernel: - The Framebuffer CONFIG_FB needs to be explicitly selected or we don't get any framebuffer anymore. DRM has stopped to select FB because of circular dependency. - The CONFIG_FB_MODE_HELPERS are not needed when using DRM framebuffer emulation as DRM does. - Drop two unused penguin logos. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Reviewed-by: Kees Cook Cc: Kees Cook Link: https://lore.kernel.org/r/20210714090040.182381-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/realview_defconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig index 483c400dd39174..4c01e313099f8e 100644 --- a/arch/arm/configs/realview_defconfig +++ b/arch/arm/configs/realview_defconfig @@ -64,11 +64,9 @@ CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_DISPLAY_CONNECTOR=y CONFIG_DRM_SIMPLE_BRIDGE=y CONFIG_DRM_PL111=y -CONFIG_FB_MODE_HELPERS=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_SOUND=y CONFIG_SND=y # CONFIG_SND_DRIVERS is not set From 850d8ec92735b3d58b81363c4ae29932a2ebbabb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Jul 2021 10:18:19 +0200 Subject: [PATCH 491/714] ARM: configs: Update Versatile defconfig This updates the Versatile defconfig for the changes in the v5.14-rc1 kernel: - The Framebuffer CONFIG_FB needs to be explicitly selected or we don't get any framebuffer anymore. DRM has stopped to select FB because of circular dependency. - The CONFIG_FB_MODE_HELPERS are not needed when using DRM framebuffer emulation as DRM does. - The Acorn fonts are removed, the default framebuffer font works fine. I don't know why this was selected in the first place or how the Kconfig was altered so it was removed. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Reviewed-by: Kees Cook Cc: Kees Cook Link: https://lore.kernel.org/r/20210714081819.139210-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/versatile_defconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index e7ecfb365e9121..b703f4757021e1 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -60,7 +60,7 @@ CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_DISPLAY_CONNECTOR=y CONFIG_DRM_SIMPLE_BRIDGE=y CONFIG_DRM_PL111=y -CONFIG_FB_MODE_HELPERS=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_LOGO=y CONFIG_SOUND=y @@ -88,8 +88,6 @@ CONFIG_NFSD=y CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m -CONFIG_FONTS=y -CONFIG_FONT_ACORN_8x8=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y From 49e7757a73d181b35851cb01b5d285888014f8b2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 13 Jul 2021 15:37:08 +0200 Subject: [PATCH 492/714] ARM: configs: Update Vexpress defconfig This updates the Versatile Express defconfig for the changes in the v5.14-rc1 kernel: - The Framebuffer CONFIG_FB needs to be explicitly selected or we don't get any framebuffer anymore. DRM has stopped to select FB because of circular dependency. - CONFIG_CMA options were moved around. - CONFIG_MODULES options were moved around. - CONFIG_CRYPTO_HW was moved around. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Acked-by: Sudeep Holla Cc: Kees Cook Cc: Sudeep Holla Link: https://lore.kernel.org/r/20210713133708.94397-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/vexpress_defconfig | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index 4479369540f285..b5e246dd23f4c6 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -11,9 +11,6 @@ CONFIG_CPUSETS=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_PROFILING=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_VEXPRESS_DCSCB=y CONFIG_ARCH_VEXPRESS_TC2_PM=y @@ -23,14 +20,17 @@ CONFIG_MCPM=y CONFIG_VMSPLIT_2G=y CONFIG_NR_CPUS=8 CONFIG_ARM_PSCI=y -CONFIG_CMA=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="console=ttyAMA0" CONFIG_CPU_IDLE=y CONFIG_VFP=y CONFIG_NEON=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_CMA=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -43,7 +43,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y @@ -59,7 +58,6 @@ CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y -# CONFIG_SATA_PMP is not set CONFIG_NETDEVICES=y CONFIG_VIRTIO_NET=y CONFIG_SMC91X=y @@ -81,11 +79,9 @@ CONFIG_DRM=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_SII902X=y CONFIG_DRM_PL111=y -CONFIG_FB_MODE_HELPERS=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_SOUND=y CONFIG_SND=y # CONFIG_SND_DRIVERS is not set @@ -136,10 +132,11 @@ CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y +# CONFIG_CRYPTO_HW is not set +CONFIG_DMA_CMA=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_DEBUG_USER=y -# CONFIG_CRYPTO_HW is not set From 042f2e107a2ea34605b3793a88b11761afc8e8e0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Jul 2021 10:55:22 +0200 Subject: [PATCH 493/714] ARM: configs: Update u8500_defconfig The platform lost the framebuffer due to a commit solving a circular dependency in v5.14-rc1, so add it back in by explicitly selecting the framebuffer. The U8500 has also gained a few systems using touchscreens from Cypress, Melfas and Zinitix so add these at the same time as we're updating the defconfig anyway. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Cc: phone-devel@vger.kernel.org Cc: Kees Cook Cc: Arnd Bergmann Cc: Stephan Gerhold Cc: newbyte@disroot.org Link: https://lore.kernel.org/r/20210712085522.672482-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/u8500_defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index dbb1ef60176214..3b30913d7d8d33 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -61,6 +61,10 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ATMEL_MXT=y CONFIG_TOUCHSCREEN_BU21013=y CONFIG_TOUCHSCREEN_CY8CTMA140=y +CONFIG_TOUCHSCREEN_CYTTSP_CORE=y +CONFIG_TOUCHSCREEN_CYTTSP_SPI=y +CONFIG_TOUCHSCREEN_MMS114=y +CONFIG_TOUCHSCREEN_ZINITIX=y CONFIG_INPUT_MISC=y CONFIG_INPUT_AB8500_PONKEY=y CONFIG_INPUT_GPIO_VIBRA=y @@ -100,6 +104,7 @@ CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI=y CONFIG_DRM_PANEL_SONY_ACX424AKP=y CONFIG_DRM_LIMA=y CONFIG_DRM_MCDE=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_KTD253=y CONFIG_BACKLIGHT_GPIO=y From ab37a7a890c1176144a4c66ff3d51ef2c20ed486 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 10 Jul 2021 13:04:55 +0200 Subject: [PATCH 494/714] ARM: multi_v7_defconfig: Make NOP_USB_XCEIV driver built-in The usage of usb-nop-xceiv PHY on Raspberry Pi boards with BCM283x has been a "regression source" a lot of times. The last case is breakage of USB mass storage boot has been commit e590474768f1 ("driver core: Set fw_devlink=on by default") for multi_v7_defconfig. As long as NOP_USB_XCEIV is configured as module, the dwc2 USB driver defer probing endlessly and prevent booting from USB mass storage device. So make the driver built-in as in bcm2835_defconfig and arm64/defconfig. Fixes: e590474768f1 ("driver core: Set fw_devlink=on by default") Reported-by: Ojaswin Mujoo Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1625915095-23077-1-git-send-email-stefan.wahren@i2se.com' Signed-off-by: Arnd Bergmann --- arch/arm/configs/multi_v7_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 52a0400fdd926e..d9abaae118dd19 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -821,7 +821,7 @@ CONFIG_USB_ISP1760=y CONFIG_USB_HSIC_USB3503=y CONFIG_AB8500_USB=y CONFIG_KEYSTONE_USB_PHY=m -CONFIG_NOP_USB_XCEIV=m +CONFIG_NOP_USB_XCEIV=y CONFIG_AM335X_PHY_USB=m CONFIG_TWL6030_USB=m CONFIG_USB_GPIO_VBUS=y From 82a1c67554dff610d6be4e1982c425717b3c6a23 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 1 Jul 2021 14:21:18 +0100 Subject: [PATCH 495/714] ARM: dts: versatile: Fix up interrupt controller node names Once the new schema interrupt-controller/arm,vic.yaml is added, we get the below warnings: arch/arm/boot/dts/versatile-ab.dt.yaml: intc@10140000: $nodename:0: 'intc@10140000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' arch/arm/boot/dts/versatile-ab.dt.yaml: intc@10140000: 'clear-mask' does not match any of the regexes Fix the node names for the interrupt controller to conform to the standard node name interrupt-controller@.. Also drop invalid clear-mask property. Signed-off-by: Sudeep Holla Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20210701132118.759454-1-sudeep.holla@arm.com' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/versatile-ab.dts | 5 ++--- arch/arm/boot/dts/versatile-pb.dts | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts index 37bd41ff8dffa9..151c0220047dd2 100644 --- a/arch/arm/boot/dts/versatile-ab.dts +++ b/arch/arm/boot/dts/versatile-ab.dts @@ -195,16 +195,15 @@ #size-cells = <1>; ranges; - vic: intc@10140000 { + vic: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x1000>; - clear-mask = <0xffffffff>; valid-mask = <0xffffffff>; }; - sic: intc@10003000 { + sic: interrupt-controller@10003000 { compatible = "arm,versatile-sic"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts index 06a0fdf24026ca..e7e751a858d811 100644 --- a/arch/arm/boot/dts/versatile-pb.dts +++ b/arch/arm/boot/dts/versatile-pb.dts @@ -7,7 +7,7 @@ amba { /* The Versatile PB is using more SIC IRQ lines than the AB */ - sic: intc@10003000 { + sic: interrupt-controller@10003000 { clear-mask = <0xffffffff>; /* * Valid interrupt lines mask according to From 5f119ba1d5771bbf46d57cff7417dcd84d3084ba Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Wed, 14 Jul 2021 17:13:20 +0800 Subject: [PATCH 496/714] net: decnet: Fix sleeping inside in af_decnet The release_sock() is blocking function, it would change the state after sleeping. use wait_woken() instead. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5dbd45dc35ad3f..dc92a67baea394 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -816,7 +816,7 @@ static int dn_auto_bind(struct socket *sock) static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; if (scp->state != DN_CR) @@ -826,11 +826,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -844,9 +844,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -858,7 +857,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) static int dn_wait_run(struct sock *sk, long *timeo) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = 0; if (scp->state == DN_RUN) @@ -867,11 +866,11 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -885,9 +884,8 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1032,16 +1030,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); skb = skb_dequeue(&sk->sk_receive_queue); } lock_sock(sk); @@ -1056,9 +1054,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } From 5b69874f74cc5707edd95fcdaa757c507ac8af0f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Jul 2021 16:09:41 -0700 Subject: [PATCH 497/714] bonding: fix build issue The commit 9a5605505d9c (" bonding: Add struct bond_ipesc to manage SA") is causing following build error when XFRM is not selected in kernel config. lld: error: undefined symbol: xfrm_dev_state_flush >>> referenced by bond_main.c:3453 (drivers/net/bonding/bond_main.c:3453) >>> net/bonding/bond_main.o:(bond_netdev_event) in archive drivers/built-in.a Fixes: 9a5605505d9c (" bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Mahesh Bandewar CC: Taehee Yoo CC: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d22d783033112f..31730efa753823 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3450,7 +3450,9 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); +#ifdef CONFIG_XFRM_OFFLOAD xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); +#endif /* CONFIG_XFRM_OFFLOAD */ break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); From bd31ecf44b8e18ccb1e5f6b50f85de6922a60de3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Jul 2021 12:43:09 +1000 Subject: [PATCH 498/714] KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash When running CPU_FTR_P9_TM_HV_ASSIST, HFSCR[TM] is set for the guest even if the host has CONFIG_TRANSACTIONAL_MEM=n, which causes it to be unprepared to handle guest exits while transactional. Normal guests don't have a problem because the HTM capability will not be advertised, but a rogue or buggy one could crash the host. Fixes: 4bb3c7a0208f ("KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210716024310.164448-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca4a..085fb8ecbf6884 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; From bc4188a2f56e821ea057aca6bf444e138d06c252 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Jul 2021 12:43:10 +1000 Subject: [PATCH 499/714] KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak vcpu_put is not called if the user copy fails. This can result in preempt notifier corruption and crashes, among other issues. Fixes: b3cebfe8c1ca ("KVM: PPC: Move vcpu_load/vcpu_put down to each ioctl case in kvm_arch_vcpu_ioctl") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210716024310.164448-2-npiggin@gmail.com --- arch/powerpc/kvm/powerpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be33b5321a7663..b4e6f70b97b940 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break; From 1c2b9519159b470ef24b2638f4794e86e2952ab7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 Jul 2021 15:27:23 +0200 Subject: [PATCH 500/714] ALSA: sb: Fix potential ABBA deadlock in CSP driver SB16 CSP driver may hit potentially a typical ABBA deadlock in two code paths: In snd_sb_csp_stop(): spin_lock_irqsave(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); In snd_sb_csp_load(): spin_lock_irqsave(&p->chip->reg_lock, flags); spin_lock(&p->chip->mixer_lock); Also the similar pattern is seen in snd_sb_csp_start(). Although the practical impact is very small (those states aren't triggered in the same running state and this happens only on a real hardware, decades old ISA sound boards -- which must be very difficult to find nowadays), it's a real scenario and has to be fixed. This patch addresses those deadlocks by splitting the locks in snd_sb_csp_start() and snd_sb_csp_stop() for avoiding the nested locks. Reported-by: Jia-Ju Bai Cc: Link: https://lore.kernel.org/r/7b0fcdaf-cd4f-4728-2eae-48c151a92e10@gmail.com Link: https://lore.kernel.org/r/20210716132723.13216-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sb/sb16_csp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c index 5bbe6695689d75..7ad8c5f7b664b4 100644 --- a/sound/isa/sb/sb16_csp.c +++ b/sound/isa/sb/sb16_csp.c @@ -816,6 +816,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); set_mode_register(p->chip, 0xc0); /* c0 = STOP */ @@ -855,6 +856,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); @@ -880,6 +882,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); if (p->running & SNDRV_SB_CSP_ST_QSOUND) { @@ -894,6 +897,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); From 33f735f137c6539e3ceceb515cd1e2a644005b49 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 Jul 2021 15:56:00 +0200 Subject: [PATCH 501/714] ALSA: hdmi: Expose all pins on MSI MS-7C94 board The BIOS on MSI Mortar B550m WiFi (MS-7C94) board with AMDGPU seems disabling the other pins than HDMI although it has more outputs including DP. This patch adds the board to the allow list for enabling all pins. Reported-by: Damjan Georgievski Cc: Link: https://lore.kernel.org/r/CAEk1YH4Jd0a8vfZxORVu7qg+Zsc-K+pR187ezNq8QhJBPW4gpw@mail.gmail.com Link: https://lore.kernel.org/r/20210716135600.24176-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4b2cc8cb55c49c..84c088912b3c00 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1940,6 +1940,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), + SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), {} }; From ec645dc96699ea6c37b6de86c84d7288ea9a4ddf Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Sat, 17 Jul 2021 14:33:28 +0200 Subject: [PATCH 502/714] block: increase BLKCG_MAX_POLS After mq-deadline learned to deal with cgroups, the BLKCG_MAX_POLS value became too small for all the elevators to be registered properly. The following issue is seen: ``` calling bfq_init+0x0/0x8b @ 1 blkcg_policy_register: BLKCG_MAX_POLS too small initcall bfq_init+0x0/0x8b returned -28 after 507 usecs ``` which renders BFQ non-functional. Increase BLKCG_MAX_POLS to allow enough space for everyone. Fixes: 08a9ad8bf607 ("block/mq-deadline: Add cgroup support") Link: https://lore.kernel.org/lkml/8988303.mDXGIdCtx8@natalenko.name/ Signed-off-by: Oleksandr Natalenko Link: https://lore.kernel.org/r/20210717123328.945810-1-oleksandr@natalenko.name Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c454fb446fd0ea..2e12320cb121e4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -57,7 +57,7 @@ struct blk_keyslot_manager; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 5 +#define BLKCG_MAX_POLS 6 typedef void (rq_end_io_fn)(struct request *, blk_status_t); From ae14c63a9f20d49dacfb6f3fa3fb11b3b4eb11bf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 17 Jul 2021 13:27:00 -0700 Subject: [PATCH 503/714] Revert "mm/slub: use stackdepot to save stack trace in objects" This reverts commit 788691464c29455346dc613a3b43c2fb9e5757a4. It's not clear why, but it causes unexplained problems in entirely unrelated xfs code. The most likely explanation is some slab corruption, possibly triggered due to CONFIG_SLUB_DEBUG_ON. See [1]. It ends up having a few other problems too, like build errors on arch/arc, and Geert reporting it using much more memory on m68k [3] (it probably does so elsewhere too, but it is probably just more noticeable on m68k). The architecture issues (both build and memory use) are likely just because this change effectively force-enabled STACKDEPOT (along with a very bad default value for the stackdepot hash size). But together with the xfs issue, this all smells like "this commit was not ready" to me. Link: https://lore.kernel.org/linux-xfs/YPE3l82acwgI2OiV@infradead.org/ [1] Link: https://lore.kernel.org/lkml/202107150600.LkGNb4Vb-lkp@intel.com/ [2] Link: https://lore.kernel.org/lkml/CAMuHMdW=eoVzM1Re5FVoEN87nKfiLmM2+Ah7eNu2KXEhCvbZyA@mail.gmail.com/ [3] Reported-by: Christoph Hellwig Reported-by: kernel test robot Reported-by: Geert Uytterhoeven Cc: Andrew Morton Cc: Vlastimil Babka Cc: Randy Dunlap Signed-off-by: Linus Torvalds --- init/Kconfig | 1 - mm/slub.c | 79 ++++++++++++++++++++-------------------------------- 2 files changed, 30 insertions(+), 50 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index bb0d6e6262b152..55f9f7738ebb4e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1847,7 +1847,6 @@ config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT depends on SLUB && SYSFS - select STACKDEPOT if STACKTRACE_SUPPORT help SLUB has extensive debug support features. Disabling these can result in significant savings in code size. This also disables diff --git a/mm/slub.c b/mm/slub.c index e1644ac6ee7bf7..090fa14628f925 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -207,8 +206,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKDEPOT - depot_stack_handle_t handle; +#ifdef CONFIG_STACKTRACE + unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ #endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ @@ -612,27 +611,22 @@ static struct track *get_track(struct kmem_cache *s, void *object, return kasan_reset_tag(p + alloc); } -#ifdef CONFIG_STACKDEPOT -static depot_stack_handle_t save_stack_depot_trace(gfp_t flags) -{ - unsigned long entries[TRACK_ADDRS_COUNT]; - depot_stack_handle_t handle; - unsigned int nr_entries; - - nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 4); - handle = stack_depot_save(entries, nr_entries, flags); - return handle; -} -#endif - static void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) { struct track *p = get_track(s, object, alloc); if (addr) { -#ifdef CONFIG_STACKDEPOT - p->handle = save_stack_depot_trace(GFP_NOWAIT); +#ifdef CONFIG_STACKTRACE + unsigned int nr_entries; + + metadata_access_enable(); + nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), + TRACK_ADDRS_COUNT, 3); + metadata_access_disable(); + + if (nr_entries < TRACK_ADDRS_COUNT) + p->addrs[nr_entries] = 0; #endif p->addr = addr; p->cpu = smp_processor_id(); @@ -659,19 +653,14 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time) pr_err("%s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKDEPOT +#ifdef CONFIG_STACKTRACE { - depot_stack_handle_t handle; - unsigned long *entries; - unsigned int nr_entries; - - handle = READ_ONCE(t->handle); - if (!handle) { - pr_err("object allocation/free stack trace missing\n"); - } else { - nr_entries = stack_depot_fetch(handle, &entries); - stack_trace_print(entries, nr_entries, 0); - } + int i; + for (i = 0; i < TRACK_ADDRS_COUNT; i++) + if (t->addrs[i]) + pr_err("\t%pS\n", (void *)t->addrs[i]); + else + break; } #endif } @@ -4045,26 +4034,18 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) objp = fixup_red_left(s, objp); trackp = get_track(s, objp, TRACK_ALLOC); kpp->kp_ret = (void *)trackp->addr; -#ifdef CONFIG_STACKDEPOT - { - depot_stack_handle_t handle; - unsigned long *entries; - unsigned int nr_entries; - - handle = READ_ONCE(trackp->handle); - if (handle) { - nr_entries = stack_depot_fetch(handle, &entries); - for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) - kpp->kp_stack[i] = (void *)entries[i]; - } +#ifdef CONFIG_STACKTRACE + for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { + kpp->kp_stack[i] = (void *)trackp->addrs[i]; + if (!kpp->kp_stack[i]) + break; + } - trackp = get_track(s, objp, TRACK_FREE); - handle = READ_ONCE(trackp->handle); - if (handle) { - nr_entries = stack_depot_fetch(handle, &entries); - for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++) - kpp->kp_free_stack[i] = (void *)entries[i]; - } + trackp = get_track(s, objp, TRACK_FREE); + for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { + kpp->kp_free_stack[i] = (void *)trackp->addrs[i]; + if (!kpp->kp_free_stack[i]) + break; } #endif #endif From e0fa7ab42232e742dcb3de9f3c1f6127b5adc019 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 15 Jul 2021 18:07:25 +0200 Subject: [PATCH 504/714] perf probe-file: Delete namelist in del_events() on the error path ASan reports some memory leaks when running: # perf test "42: BPF filter" This second leak is caused by a strlist not being dellocated on error inside probe_file__del_events. This patch adds a goto label before the deallocation and makes the error path jump to it. Signed-off-by: Riccardo Mancini Fixes: e7895e422e4da63d ("perf probe: Split del_perf_probe_events()") Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/174963c587ae77fa108af794669998e4ae558338.1626343282.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index f9a6cbcd641501..3d50de3217d50a 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -377,11 +377,11 @@ int probe_file__del_events(int fd, struct strfilter *filter) ret = probe_file__get_events(fd, filter, namelist); if (ret < 0) - return ret; + goto out; ret = probe_file__del_strlist(fd, namelist); +out: strlist__delete(namelist); - return ret; } From d4b3eedce151e63932ce4a00f1d0baa340a8b907 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Fri, 16 Jul 2021 16:11:20 +0200 Subject: [PATCH 505/714] perf data: Close all files in close_dir() When using 'perf report' in directory mode, the first file is not closed on exit, causing a memory leak. The problem is caused by the iterating variable never reaching 0. Fixes: 145520631130bd64 ("perf data: Add perf_data__(create_dir|close_dir) functions") Signed-off-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Zhen Lei Link: http://lore.kernel.org/lkml/20210716141122.858082-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a9c102e8e3c005..f5d260b1df4d1c 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -20,7 +20,7 @@ static void close_dir(struct perf_data_file *files, int nr) { - while (--nr >= 1) { + while (--nr >= 0) { close(files[nr].fd); zfree(&files[nr].path); } From 22a665513b34df458da1d3b7ee0b919c3f3d4653 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Thu, 15 Jul 2021 14:37:23 +0800 Subject: [PATCH 506/714] perf probe: Fix add event failure when running 32-bit perf in a 64-bit kernel The "address" member of "struct probe_trace_point" uses long data type. If kernel is 64-bit and perf program is 32-bit, size of "address" variable is 32 bits. As a result, upper 32 bits of address read from kernel are truncated, an error occurs during address comparison in kprobe_warn_out_range(). Before: # perf probe -a schedule schedule is out of .text, skip it. Error: Failed to add events. Solution: Change data type of "address" variable to u64 and change corresponding address printing and value assignment. After: # perf.new.new probe -a schedule Added new event: probe:schedule (on schedule) You can now use it in all perf tools, such as: perf record -e probe:schedule -aR sleep 1 # perf probe -l probe:schedule (on schedule@kernel/sched/core.c) # perf record -e probe:schedule -aR sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.156 MB perf.data (1366 samples) ] # perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 1K of event 'probe:schedule' # Event count (approx.): 1366 # # Overhead Command Shared Object Symbol # ........ ............... ................. ............ # 6.22% migration/0 [kernel.kallsyms] [k] schedule 6.22% migration/1 [kernel.kallsyms] [k] schedule 6.22% migration/2 [kernel.kallsyms] [k] schedule 6.22% migration/3 [kernel.kallsyms] [k] schedule 6.15% migration/10 [kernel.kallsyms] [k] schedule 6.15% migration/11 [kernel.kallsyms] [k] schedule 6.15% migration/12 [kernel.kallsyms] [k] schedule 6.15% migration/13 [kernel.kallsyms] [k] schedule 6.15% migration/14 [kernel.kallsyms] [k] schedule 6.15% migration/15 [kernel.kallsyms] [k] schedule 6.15% migration/4 [kernel.kallsyms] [k] schedule 6.15% migration/5 [kernel.kallsyms] [k] schedule 6.15% migration/6 [kernel.kallsyms] [k] schedule 6.15% migration/7 [kernel.kallsyms] [k] schedule 6.15% migration/8 [kernel.kallsyms] [k] schedule 6.15% migration/9 [kernel.kallsyms] [k] schedule 0.22% rcu_sched [kernel.kallsyms] [k] schedule ... # # (Cannot load tips.txt file, please install perf!) # Signed-off-by: Yang Jihong Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Frank Ch. Eigler Cc: Ian Rogers Cc: Jianlin Lv Cc: Jin Yao Cc: Jiri Olsa Cc: Li Huafei Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Srikar Dronamraju Link: http://lore.kernel.org/lkml/20210715063723.11926-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 8 +++--- tools/perf/util/dwarf-aux.h | 2 +- tools/perf/util/probe-event.c | 49 ++++++++++++++++------------------ tools/perf/util/probe-event.h | 4 +-- tools/perf/util/probe-finder.c | 15 +++++------ tools/perf/util/probe-finder.h | 2 +- 6 files changed, 38 insertions(+), 42 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7d2ba8419b0c46..609ca16715018c 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -113,14 +113,14 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr) * * Find a line number and file name for @addr in @cu_die. */ -int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, - const char **fname, int *lineno) +int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, + const char **fname, int *lineno) { Dwarf_Line *line; Dwarf_Die die_mem; Dwarf_Addr faddr; - if (die_find_realfunc(cu_die, (Dwarf_Addr)addr, &die_mem) + if (die_find_realfunc(cu_die, addr, &die_mem) && die_entrypc(&die_mem, &faddr) == 0 && faddr == addr) { *fname = dwarf_decl_file(&die_mem); @@ -128,7 +128,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, goto out; } - line = cu_getsrc_die(cu_die, (Dwarf_Addr)addr); + line = cu_getsrc_die(cu_die, addr); if (line && dwarf_lineno(line, lineno) == 0) { *fname = dwarf_linesrc(line, NULL, NULL); if (!*fname) diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index cb99646843a941..7ee0fa19b5c490 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -19,7 +19,7 @@ const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); const char *cu_get_comp_dir(Dwarf_Die *cu_die); /* Get a line number and file name for given address */ -int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, +int cu_find_lineinfo(Dwarf_Die *cudie, Dwarf_Addr addr, const char **fname, int *lineno); /* Walk on functions at given address */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e05750cea34c3a..b2a02c9ab8ea9f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -239,8 +239,8 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) clear_probe_trace_event(tevs + i); } -static bool kprobe_blacklist__listed(unsigned long address); -static bool kprobe_warn_out_range(const char *symbol, unsigned long address) +static bool kprobe_blacklist__listed(u64 address); +static bool kprobe_warn_out_range(const char *symbol, u64 address) { struct map *map; bool ret = false; @@ -400,8 +400,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, pr_debug("Symbol %s address found : %" PRIx64 "\n", pp->function, address); - ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, - result); + ret = debuginfo__find_probe_point(dinfo, address, result); if (ret <= 0) ret = (!ret) ? -ENOENT : ret; else { @@ -589,7 +588,7 @@ static void debuginfo_cache__exit(void) } -static int get_text_start_address(const char *exec, unsigned long *address, +static int get_text_start_address(const char *exec, u64 *address, struct nsinfo *nsi) { Elf *elf; @@ -634,7 +633,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, bool is_kprobe) { struct debuginfo *dinfo = NULL; - unsigned long stext = 0; + u64 stext = 0; u64 addr = tp->address; int ret = -ENOENT; @@ -662,8 +661,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, dinfo = debuginfo_cache__open(tp->module, verbose <= 0); if (dinfo) - ret = debuginfo__find_probe_point(dinfo, - (unsigned long)addr, pp); + ret = debuginfo__find_probe_point(dinfo, addr, pp); else ret = -ENOENT; @@ -678,7 +676,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, /* Adjust symbol name and address */ static int post_process_probe_trace_point(struct probe_trace_point *tp, - struct map *map, unsigned long offs) + struct map *map, u64 offs) { struct symbol *sym; u64 addr = tp->address - offs; @@ -721,7 +719,7 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *pathname) { struct map *map; - unsigned long stext = 0; + u64 stext = 0; int i, ret = 0; /* Prepare a map for offline binary */ @@ -747,7 +745,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, struct nsinfo *nsi) { int i, ret = 0; - unsigned long stext = 0; + u64 stext = 0; if (!exec) return 0; @@ -792,7 +790,7 @@ post_process_module_probe_trace_events(struct probe_trace_event *tevs, mod_name = find_module_name(module); for (i = 0; i < ntevs; i++) { ret = post_process_probe_trace_point(&tevs[i].point, - map, (unsigned long)text_offs); + map, text_offs); if (ret < 0) break; tevs[i].point.module = @@ -1536,7 +1534,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * so tmp[1] should always valid (but could be '\0'). */ if (tmp && !strncmp(tmp, "0x", 2)) { - pp->abs_address = strtoul(pp->function, &tmp, 0); + pp->abs_address = strtoull(pp->function, &tmp, 0); if (*tmp != '\0') { semantic_error("Invalid absolute address.\n"); return -EINVAL; @@ -1911,7 +1909,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) argv[i] = NULL; argc -= 1; } else - tp->address = strtoul(fmt1_str, NULL, 0); + tp->address = strtoull(fmt1_str, NULL, 0); } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); @@ -2157,7 +2155,7 @@ synthesize_uprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf) return -EINVAL; /* Use the tp->address for uprobes */ - err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address); + err = strbuf_addf(buf, "%s:0x%" PRIx64, tp->module, tp->address); if (err >= 0 && tp->ref_ctr_offset) { if (!uprobe_ref_ctr_is_supported()) @@ -2172,7 +2170,7 @@ synthesize_kprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf) { if (!strncmp(tp->symbol, "0x", 2)) { /* Absolute address. See try_to_find_absolute_address() */ - return strbuf_addf(buf, "%s%s0x%lx", tp->module ?: "", + return strbuf_addf(buf, "%s%s0x%" PRIx64, tp->module ?: "", tp->module ? ":" : "", tp->address); } else { return strbuf_addf(buf, "%s%s%s+%lu", tp->module ?: "", @@ -2271,7 +2269,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, pp->function = strdup(tp->symbol); pp->offset = tp->offset; } else { - ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); + ret = e_snprintf(buf, 128, "0x%" PRIx64, tp->address); if (ret < 0) return ret; pp->function = strdup(buf); @@ -2452,8 +2450,8 @@ void clear_probe_trace_event(struct probe_trace_event *tev) struct kprobe_blacklist_node { struct list_head list; - unsigned long start; - unsigned long end; + u64 start; + u64 end; char *symbol; }; @@ -2498,7 +2496,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) } INIT_LIST_HEAD(&node->list); list_add_tail(&node->list, blacklist); - if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + if (sscanf(buf, "0x%" PRIx64 "-0x%" PRIx64, &node->start, &node->end) != 2) { ret = -EINVAL; break; } @@ -2514,7 +2512,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) ret = -ENOMEM; break; } - pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + pr_debug2("Blacklist: 0x%" PRIx64 "-0x%" PRIx64 ", %s\n", node->start, node->end, node->symbol); ret++; } @@ -2526,8 +2524,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) } static struct kprobe_blacklist_node * -kprobe_blacklist__find_by_address(struct list_head *blacklist, - unsigned long address) +kprobe_blacklist__find_by_address(struct list_head *blacklist, u64 address) { struct kprobe_blacklist_node *node; @@ -2555,7 +2552,7 @@ static void kprobe_blacklist__release(void) kprobe_blacklist__delete(&kprobe_blacklist); } -static bool kprobe_blacklist__listed(unsigned long address) +static bool kprobe_blacklist__listed(u64 address) { return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address); } @@ -3223,7 +3220,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, * In __add_probe_trace_events, a NULL symbol is interpreted as * invalid. */ - if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + if (asprintf(&tp->symbol, "0x%" PRIx64, tp->address) < 0) goto errout; /* For kprobe, check range */ @@ -3234,7 +3231,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, goto errout; } - if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + if (asprintf(&tp->realname, "abs_%" PRIx64, tp->address) < 0) goto errout; if (pev->target) { diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 65769d7949a317..8ad5b1579f1d37 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -33,7 +33,7 @@ struct probe_trace_point { char *module; /* Module name */ unsigned long offset; /* Offset from symbol */ unsigned long ref_ctr_offset; /* SDT reference counter offset */ - unsigned long address; /* Actual address of the trace point */ + u64 address; /* Actual address of the trace point */ bool retprobe; /* Return probe flag */ }; @@ -70,7 +70,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ - unsigned long abs_address; /* Absolute address of the point */ + u64 abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 02ef0d78053b7f..50d861a80f5726 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -668,7 +668,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, } tp->offset = (unsigned long)(paddr - eaddr); - tp->address = (unsigned long)paddr; + tp->address = paddr; tp->symbol = strdup(symbol); if (!tp->symbol) return -ENOMEM; @@ -1707,7 +1707,7 @@ int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, } /* Reverse search */ -int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, +int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) { Dwarf_Die cudie, spdie, indie; @@ -1720,14 +1720,14 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, addr += baseaddr; /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { - pr_warning("Failed to find debug information for address %lx\n", + pr_warning("Failed to find debug information for address %" PRIx64 "\n", addr); ret = -EINVAL; goto end; } /* Find a corresponding line (filename and lineno) */ - cu_find_lineinfo(&cudie, addr, &fname, &lineno); + cu_find_lineinfo(&cudie, (Dwarf_Addr)addr, &fname, &lineno); /* Don't care whether it failed or not */ /* Find a corresponding function (name, baseline and baseaddr) */ @@ -1742,7 +1742,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, } fname = dwarf_decl_file(&spdie); - if (addr == (unsigned long)baseaddr) { + if (addr == baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; goto post; @@ -1788,7 +1788,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, if (lineno) ppt->line = lineno - baseline; else if (basefunc) { - ppt->offset = addr - (unsigned long)baseaddr; + ppt->offset = addr - baseaddr; func = basefunc; } @@ -1828,8 +1828,7 @@ static int line_range_add_line(const char *src, unsigned int lineno, } static int line_range_walk_cb(const char *fname, int lineno, - Dwarf_Addr addr __maybe_unused, - void *data) + Dwarf_Addr addr, void *data) { struct line_finder *lf = data; const char *__fname; diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 2febb587567895..8bc1c80d3c1c0b 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -46,7 +46,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ -int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, +int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, From b0f008551f0bf4d5f6db9b5f0e071b02790d6a2e Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 13 Jul 2021 19:23:58 +0800 Subject: [PATCH 507/714] perf sched: Fix record failure when CONFIG_SCHEDSTATS is not set The tracepoints trace_sched_stat_{wait, sleep, iowait} are not exposed to user if CONFIG_SCHEDSTATS is not set, "perf sched record" records the three events. As a result, the command fails. Before: #perf sched record sleep 1 event syntax error: 'sched:sched_stat_wait' \___ unknown tracepoint Error: File /sys/kernel/tracing/events/sched/sched_stat_wait not found. Hint: Perhaps this kernel misses some CONFIG_ setting to enable this feature?. Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events Solution: Check whether schedstat tracepoints are exposed. If no, these events are not recorded. After: # perf sched record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.163 MB perf.data (1091 samples) ] # perf sched report run measurement overhead: 4736 nsecs sleep measurement overhead: 9059979 nsecs the run test took 999854 nsecs the sleep test took 8945271 nsecs nr_run_events: 716 nr_sleep_events: 785 nr_wakeup_events: 0 ... ------------------------------------------------------------ Fixes: 2a09b5de235a6 ("sched/fair: do not expose some tracepoints to user if CONFIG_SCHEDSTATS is not set") Signed-off-by: Yang Jihong Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Yafang Shao Link: http://lore.kernel.org/lkml/20210713112358.194693-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index aaf1cde476c951..1ff10d4bccf3cc 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3335,6 +3335,16 @@ static void setup_sorting(struct perf_sched *sched, const struct option *options sort_dimension__add("pid", &sched->cmp_pid); } +static bool schedstat_events_exposed(void) +{ + /* + * Select "sched:sched_stat_wait" event to check + * whether schedstat tracepoints are exposed. + */ + return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; @@ -3346,21 +3356,33 @@ static int __cmd_record(int argc, const char **argv) "-m", "1024", "-c", "1", "-e", "sched:sched_switch", - "-e", "sched:sched_stat_wait", - "-e", "sched:sched_stat_sleep", - "-e", "sched:sched_stat_iowait", "-e", "sched:sched_stat_runtime", "-e", "sched:sched_process_fork", "-e", "sched:sched_wakeup_new", "-e", "sched:sched_migrate_task", }; + + /* + * The tracepoints trace_sched_stat_{wait, sleep, iowait} + * are not exposed to user if CONFIG_SCHEDSTATS is not set, + * to prevent "perf sched record" execution failure, determine + * whether to record schedstat events according to actual situation. + */ + const char * const schedstat_args[] = { + "-e", "sched:sched_stat_wait", + "-e", "sched:sched_stat_sleep", + "-e", "sched:sched_stat_iowait", + }; + unsigned int schedstat_argc = schedstat_events_exposed() ? + ARRAY_SIZE(schedstat_args) : 0; + struct tep_event *waking_event; /* * +2 for either "-e", "sched:sched_wakeup" or * "-e", "sched:sched_waking" */ - rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1; + rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -3376,6 +3398,9 @@ static int __cmd_record(int argc, const char **argv) else rec_argv[i++] = strdup("sched:sched_wakeup"); + for (j = 0; j < schedstat_argc; j++) + rec_argv[i++] = strdup(schedstat_args[j]); + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; From 5df99bec210a2cf89dd91e52f0d0a714bf4cd96a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 12 Jul 2021 15:35:46 -0400 Subject: [PATCH 508/714] scripts/setlocalversion: fix a bug when LOCALVERSION is empty The commit 042da426f8eb ("scripts/setlocalversion: simplify the short version part") reduces indentation. Unfortunately, it also changes behavior in a subtle way - if the user has empty "LOCALVERSION" variable, the plus sign is appended to the kernel version. It wasn't appended before. This patch reverts to the old behavior - we append the plus sign only if the LOCALVERSION variable is not set. Fixes: 042da426f8eb ("scripts/setlocalversion: simplify the short version part") Signed-off-by: Mikulas Patocka Signed-off-by: Masahiro Yamada --- scripts/setlocalversion | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 151f04971faad7..6b54e46a0f1245 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -131,11 +131,14 @@ res="${res}${CONFIG_LOCALVERSION}${LOCALVERSION}" if test "$CONFIG_LOCALVERSION_AUTO" = "y"; then # full scm version string res="$res$(scm_version)" -elif [ -z "${LOCALVERSION}" ]; then - # append a plus sign if the repository is not in a clean - # annotated or signed tagged state (as git describe only - # looks at signed or annotated tags - git tag -a/-s) and - # LOCALVERSION= is not specified +elif [ "${LOCALVERSION+set}" != "set" ]; then + # If the variable LOCALVERSION is not set, append a plus + # sign if the repository is not in a clean annotated or + # signed tagged state (as git describe only looks at signed + # or annotated tags - git tag -a/-s). + # + # If the variable LOCALVERSION is set (including being set + # to an empty string), we don't want to append a plus sign. scm=$(scm_version --short) res="$res${scm:++}" fi From d952cfaf0cffdbbb0433c67206b645131f17ca5f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 14 Jul 2021 13:23:49 +0900 Subject: [PATCH 509/714] kbuild: do not suppress Kconfig prompts for silent build When a new CONFIG option is available, Kbuild shows a prompt to get the user input. $ make [ snip ] Core Scheduling for SMT (SCHED_CORE) [N/y/?] (NEW) This is the only interactive place in the build process. Commit 174a1dcc9642 ("kbuild: sink stdout from cmd for silent build") suppressed Kconfig prompts as well because syncconfig is invoked by the 'cmd' macro. You cannot notice the fact that Kconfig is waiting for the user input. Use 'kecho' to show the equivalent short log without suppressing stdout from sub-make. Fixes: 174a1dcc9642 ("kbuild: sink stdout from cmd for silent build") Reported-by: Tetsuo Handa Signed-off-by: Masahiro Yamada Tested-by: Tetsuo Handa --- Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index c3f9bd191b8941..e264c5440f7096 100644 --- a/Makefile +++ b/Makefile @@ -728,11 +728,12 @@ $(KCONFIG_CONFIG): # This exploits the 'multi-target pattern rule' trick. # The syncconfig should be executed only once to make all the targets. # (Note: use the grouped target '&:' when we bump to GNU Make 4.3) -quiet_cmd_syncconfig = SYNC $@ - cmd_syncconfig = $(MAKE) -f $(srctree)/Makefile syncconfig - +# +# Do not use $(call cmd,...) here. That would suppress prompts from syncconfig, +# so you cannot notice that Kconfig is waiting for the user input. %/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h: $(KCONFIG_CONFIG) - +$(call cmd,syncconfig) + $(Q)$(kecho) " SYNC $@" + $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig else # !may-sync-config # External modules and some install targets need include/generated/autoconf.h # and include/config/auto.conf but do not care if they are up-to-date. From 1d11053dc63094075bf9e4809fffd3bb5e72f9a6 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Thu, 15 Jul 2021 15:37:16 +0800 Subject: [PATCH 510/714] Kbuild: lto: fix module versionings mismatch in GNU make 3.X When building modules(CONFIG_...=m), I found some of module versions are incorrect and set to 0. This can be found in build log for first clean build which shows WARNING: EXPORT symbol "XXXX" [drivers/XXX/XXX.ko] version generation failed, symbol will not be versioned. But in second build(incremental build), the WARNING disappeared and the module version becomes valid CRC and make someone who want to change modules without updating kernel image can't insert their modules. The problematic code is + $(foreach n, $(filter-out FORCE,$^), \ + $(if $(wildcard $(n).symversions), \ + ; cat $(n).symversions >> $@.symversions)) For example: rm -f fs/notify/built-in.a.symversions ; rm -f fs/notify/built-in.a; \ llvm-ar cDPrST fs/notify/built-in.a fs/notify/fsnotify.o \ fs/notify/notification.o fs/notify/group.o ... `foreach n` shows nothing to `cat` into $(n).symversions because `if $(wildcard $(n).symversions)` return nothing, but actually they do exist during this line was executed. -rw-r--r-- 1 root root 168580 Jun 13 19:10 fs/notify/fsnotify.o -rw-r--r-- 1 root root 111 Jun 13 19:10 fs/notify/fsnotify.o.symversions The reason is the $(n).symversions are generated at runtime, but Makefile wildcard function expends and checks the file exist or not during parsing the Makefile. Thus fix this by use `test` shell command to check the file existence in runtime. Rebase from both: 1. [https://lore.kernel.org/lkml/20210616080252.32046-1-lecopzer.chen@mediatek.com/] 2. [https://lore.kernel.org/lkml/20210702032943.7865-1-lecopzer.chen@mediatek.com/] Fixes: 38e891849003 ("kbuild: lto: fix module versioning") Co-developed-by: Sami Tolvanen Signed-off-by: Lecopzer Chen Signed-off-by: Masahiro Yamada --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 10b2f2380d6fb7..02197cb8e3a771 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -386,7 +386,7 @@ ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) cmd_update_lto_symversions = \ rm -f $@.symversions \ $(foreach n, $(filter-out FORCE,$^), \ - $(if $(wildcard $(n).symversions), \ + $(if $(shell test -s $(n).symversions && echo y), \ ; cat $(n).symversions >> $@.symversions)) else cmd_update_lto_symversions = echo >/dev/null From 5e60f363b38fd40e4d8838b5d6f4d4ecee92c777 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 15 Jul 2021 11:26:02 +0200 Subject: [PATCH 511/714] Documentation: Fix intiramfs script name Documentation was not changed when renaming the script in commit 80e715a06c2d ("initramfs: rename gen_initramfs_list.sh to gen_initramfs.sh"). Fixing this. Basically does: $ sed -i -e s/gen_initramfs_list.sh/gen_initramfs.sh/g $(git grep -l gen_initramfs_list.sh) Fixes: 80e715a06c2d ("initramfs: rename gen_initramfs_list.sh to gen_initramfs.sh") Signed-off-by: Robert Richter Signed-off-by: Masahiro Yamada --- .../early-userspace/early_userspace_support.rst | 8 ++++---- Documentation/filesystems/ramfs-rootfs-initramfs.rst | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst index 8a58c61932ff5f..61bdeac1bae541 100644 --- a/Documentation/driver-api/early-userspace/early_userspace_support.rst +++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst @@ -69,17 +69,17 @@ early userspace image can be built by an unprivileged user. As a technical note, when directories and files are specified, the entire CONFIG_INITRAMFS_SOURCE is passed to -usr/gen_initramfs_list.sh. This means that CONFIG_INITRAMFS_SOURCE +usr/gen_initramfs.sh. This means that CONFIG_INITRAMFS_SOURCE can really be interpreted as any legal argument to -gen_initramfs_list.sh. If a directory is specified as an argument then +gen_initramfs.sh. If a directory is specified as an argument then the contents are scanned, uid/gid translation is performed, and usr/gen_init_cpio file directives are output. If a directory is -specified as an argument to usr/gen_initramfs_list.sh then the +specified as an argument to usr/gen_initramfs.sh then the contents of the file are simply copied to the output. All of the output directives from directory scanning and file contents copying are processed by usr/gen_init_cpio. -See also 'usr/gen_initramfs_list.sh -h'. +See also 'usr/gen_initramfs.sh -h'. Where's this all leading? ========================= diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.rst b/Documentation/filesystems/ramfs-rootfs-initramfs.rst index 4598b0d90b6079..164960631925d0 100644 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.rst +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst @@ -170,7 +170,7 @@ Documentation/driver-api/early-userspace/early_userspace_support.rst for more de The kernel does not depend on external cpio tools. If you specify a directory instead of a configuration file, the kernel's build infrastructure creates a configuration file from that directory (usr/Makefile calls -usr/gen_initramfs_list.sh), and proceeds to package up that directory +usr/gen_initramfs.sh), and proceeds to package up that directory using the config file (by feeding it to usr/gen_init_cpio, which is created from usr/gen_init_cpio.c). The kernel's build-time cpio creation code is entirely self-contained, and the kernel's boot-time extractor is also From a17ad0961706244dce48ec941f7e476a38c0e727 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 15 Jul 2021 16:59:00 -0700 Subject: [PATCH 512/714] net: Fix zero-copy head len calculation. In some cases skb head could be locked and entire header data is pulled from skb. When skb_zerocopy() called in such cases, following BUG is triggered. This patch fixes it by copying entire skb in such cases. This could be optimized incase this is performance bottleneck. ---8<--- kernel BUG at net/core/skbuff.c:2961! invalid opcode: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Tainted: G OE 5.4.0-77-generic #86-Ubuntu Hardware name: OpenStack Foundation OpenStack Nova, BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:skb_zerocopy+0x37a/0x3a0 RSP: 0018:ffffbcc70013ca38 EFLAGS: 00010246 Call Trace: queue_userspace_packet+0x2af/0x5e0 [openvswitch] ovs_dp_upcall+0x3d/0x60 [openvswitch] ovs_dp_process_packet+0x125/0x150 [openvswitch] ovs_vport_receive+0x77/0xd0 [openvswitch] netdev_port_receive+0x87/0x130 [openvswitch] netdev_frame_hook+0x4b/0x60 [openvswitch] __netif_receive_skb_core+0x2b4/0xc90 __netif_receive_skb_one_core+0x3f/0xa0 __netif_receive_skb+0x18/0x60 process_backlog+0xa9/0x160 net_rx_action+0x142/0x390 __do_softirq+0xe1/0x2d6 irq_exit+0xae/0xb0 do_IRQ+0x5a/0xf0 common_interrupt+0xf/0xf Code that triggered BUG: int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0fe97d6607902a..fc7942c0dddc36 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3022,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from) if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); + if (!hlen) + hlen = from->len; + } if (skb_has_frag_list(from)) hlen = from->len; From f5051bcece50140abd1a11a2d36dc3ec5484fc32 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sat, 17 Jul 2021 14:29:33 +0300 Subject: [PATCH 513/714] net: sched: fix memory leak in tcindex_partial_destroy_work Syzbot reported memory leak in tcindex_set_parms(). The problem was in non-freed perfect hash in tcindex_partial_destroy_work(). In tcindex_set_parms() new tcindex_data is allocated and some fields from old one are copied to new one, but not the perfect hash. Since tcindex_partial_destroy_work() is the destroy function for old tcindex_data, we need to free perfect hash to avoid memory leak. Reported-and-tested-by: syzbot+f0bbb2287b8993d4fa74@syzkaller.appspotmail.com Fixes: 331b72922c5f ("net: sched: RCU cls_tcindex") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 5b274534264c2d..e9a8a2c86bbdd0 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -278,6 +278,8 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r, TCA_TCINDEX_POLICE); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp); + static void tcindex_partial_destroy_work(struct work_struct *work) { struct tcindex_data *p = container_of(to_rcu_work(work), @@ -285,7 +287,8 @@ static void tcindex_partial_destroy_work(struct work_struct *work) rwork); rtnl_lock(); - kfree(p->perfect); + if (p->perfect) + tcindex_free_perfect_hash(p); kfree(p); rtnl_unlock(); } From 2f3fdd8d4805015fa964807e1c7f3d88f31bd389 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 17 Jul 2021 17:19:19 -0400 Subject: [PATCH 514/714] sctp: trim optlen when it's a huge value in sctp_setsockopt After commit ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt"), it does memory allocation in sctp_setsockopt with the optlen, and it would fail the allocation and return error if the optlen from user space is a huge value. This breaks some sockopts, like SCTP_HMAC_IDENT, SCTP_RESET_STREAMS and SCTP_AUTH_KEY, as when processing these sockopts before, optlen would be trimmed to a biggest value it needs when optlen is a huge value, instead of failing the allocation and returning error. This patch is to fix the allocation failure when it's a huge optlen from user space by trimming it to the biggest size sctp sockopt may need when necessary, and this biggest size is from sctp_setsockopt_reset_streams() for SCTP_RESET_STREAMS, which is bigger than those for SCTP_HMAC_IDENT and SCTP_AUTH_KEY. Fixes: ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e64e01f61b117b..6b937bfd475159 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4577,6 +4577,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, } if (optlen > 0) { + /* Trim it to the biggest size sctp sockopt may need if necessary */ + optlen = min_t(unsigned int, optlen, + PAGE_ALIGN(USHRT_MAX + + sizeof(__u16) * sizeof(struct sctp_reset_streams))); kopt = memdup_sockptr(optval, optlen); if (IS_ERR(kopt)) return PTR_ERR(kopt); From 517a16b1a88bdb6b530f48d5d153478b2552d9a8 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Sun, 18 Jul 2021 22:40:13 +0800 Subject: [PATCH 515/714] netrom: Decrease sock refcount when sock timers expire Commit 63346650c1a9 ("netrom: switch to sock timer API") switched to use sock timer API. It replaces mod_timer() by sk_reset_timer(), and del_timer() by sk_stop_timer(). Function sk_reset_timer() will increase the refcount of sock if it is called on an inactive timer, hence, in case the timer expires, we need to decrease the refcount ourselves in the handler, otherwise, the sock refcount will be unbalanced and the sock will never be freed. Signed-off-by: Nguyen Dinh Phi Reported-by: syzbot+10f1194569953b72f1ae@syzkaller.appspotmail.com Fixes: 63346650c1a9 ("netrom: switch to sock timer API") Signed-off-by: David S. Miller --- net/netrom/nr_timer.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 9115f8a7dd45b5..a8da88db7893fc 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,11 +121,9 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); - sock_put(sk); - return; + goto out; } break; @@ -146,6 +144,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) nr_start_heartbeat(sk); bh_unlock_sock(sk); +out: + sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) @@ -159,6 +159,7 @@ static void nr_t2timer_expiry(struct timer_list *t) nr_enquiry_response(sk); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) @@ -169,6 +170,7 @@ static void nr_t4timer_expiry(struct timer_list *t) bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); + sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) @@ -197,6 +199,7 @@ static void nr_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) @@ -209,8 +212,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); @@ -220,8 +222,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); @@ -231,8 +232,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_requeue_frames(sk); @@ -241,5 +241,7 @@ static void nr_t1timer_expiry(struct timer_list *t) } nr_start_t1timer(sk); +out: bh_unlock_sock(sk); + sock_put(sk); } From 2734d6c1b1a089fb593ef6a23d4b70903526fe0c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Jul 2021 14:13:49 -0700 Subject: [PATCH 516/714] Linux 5.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0517005d65fafd..e4f5895badb5eb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Opossums on Parade # *DOCUMENTATION* From e746f3451ec7f91dcc9fd67a631239c715850a34 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 30 Jun 2021 19:25:59 -0500 Subject: [PATCH 517/714] scsi: iscsi: Fix iface sysfs attr detection A ISCSI_IFACE_PARAM can have the same value as a ISCSI_NET_PARAM so when iscsi_iface_attr_is_visible tries to figure out the type by just checking the value, we can collide and return the wrong type. When we call into the driver we might not match and return that we don't want attr visible in sysfs. The patch fixes this by setting the type when we figure out what the param is. Link: https://lore.kernel.org/r/20210701002559.89533-1-michael.christie@oracle.com Fixes: 3e0f65b34cc9 ("[SCSI] iscsi_transport: Additional parameters for network settings") Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 90 +++++++++++------------------ 1 file changed, 34 insertions(+), 56 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b07105ae7c9172..d8b05d8b54708a 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -439,39 +439,10 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct iscsi_transport *t = iface->transport; - int param; - int param_type; + int param = -1; if (attr == &dev_attr_iface_enabled.attr) param = ISCSI_NET_PARAM_IFACE_ENABLE; - else if (attr == &dev_attr_iface_vlan_id.attr) - param = ISCSI_NET_PARAM_VLAN_ID; - else if (attr == &dev_attr_iface_vlan_priority.attr) - param = ISCSI_NET_PARAM_VLAN_PRIORITY; - else if (attr == &dev_attr_iface_vlan_enabled.attr) - param = ISCSI_NET_PARAM_VLAN_ENABLED; - else if (attr == &dev_attr_iface_mtu.attr) - param = ISCSI_NET_PARAM_MTU; - else if (attr == &dev_attr_iface_port.attr) - param = ISCSI_NET_PARAM_PORT; - else if (attr == &dev_attr_iface_ipaddress_state.attr) - param = ISCSI_NET_PARAM_IPADDR_STATE; - else if (attr == &dev_attr_iface_delayed_ack_en.attr) - param = ISCSI_NET_PARAM_DELAYED_ACK_EN; - else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) - param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) - param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf.attr) - param = ISCSI_NET_PARAM_TCP_WSF; - else if (attr == &dev_attr_iface_tcp_timer_scale.attr) - param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; - else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) - param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; - else if (attr == &dev_attr_iface_cache_id.attr) - param = ISCSI_NET_PARAM_CACHE_ID; - else if (attr == &dev_attr_iface_redirect_en.attr) - param = ISCSI_NET_PARAM_REDIRECT_EN; else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) @@ -508,6 +479,38 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN; else if (attr == &dev_attr_iface_initiator_name.attr) param = ISCSI_IFACE_PARAM_INITIATOR_NAME; + + if (param != -1) + return t->attr_is_visible(ISCSI_IFACE_PARAM, param); + + if (attr == &dev_attr_iface_vlan_id.attr) + param = ISCSI_NET_PARAM_VLAN_ID; + else if (attr == &dev_attr_iface_vlan_priority.attr) + param = ISCSI_NET_PARAM_VLAN_PRIORITY; + else if (attr == &dev_attr_iface_vlan_enabled.attr) + param = ISCSI_NET_PARAM_VLAN_ENABLED; + else if (attr == &dev_attr_iface_mtu.attr) + param = ISCSI_NET_PARAM_MTU; + else if (attr == &dev_attr_iface_port.attr) + param = ISCSI_NET_PARAM_PORT; + else if (attr == &dev_attr_iface_ipaddress_state.attr) + param = ISCSI_NET_PARAM_IPADDR_STATE; + else if (attr == &dev_attr_iface_delayed_ack_en.attr) + param = ISCSI_NET_PARAM_DELAYED_ACK_EN; + else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) + param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) + param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf.attr) + param = ISCSI_NET_PARAM_TCP_WSF; + else if (attr == &dev_attr_iface_tcp_timer_scale.attr) + param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; + else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) + param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; + else if (attr == &dev_attr_iface_cache_id.attr) + param = ISCSI_NET_PARAM_CACHE_ID; + else if (attr == &dev_attr_iface_redirect_en.attr) + param = ISCSI_NET_PARAM_REDIRECT_EN; else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) { if (attr == &dev_attr_ipv4_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV4_ADDR; @@ -598,32 +601,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, return 0; } - switch (param) { - case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO: - case ISCSI_IFACE_PARAM_HDRDGST_EN: - case ISCSI_IFACE_PARAM_DATADGST_EN: - case ISCSI_IFACE_PARAM_IMM_DATA_EN: - case ISCSI_IFACE_PARAM_INITIAL_R2T_EN: - case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN: - case ISCSI_IFACE_PARAM_PDU_INORDER_EN: - case ISCSI_IFACE_PARAM_ERL: - case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH: - case ISCSI_IFACE_PARAM_FIRST_BURST: - case ISCSI_IFACE_PARAM_MAX_R2T: - case ISCSI_IFACE_PARAM_MAX_BURST: - case ISCSI_IFACE_PARAM_CHAP_AUTH_EN: - case ISCSI_IFACE_PARAM_BIDI_CHAP_EN: - case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL: - case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN: - case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN: - case ISCSI_IFACE_PARAM_INITIATOR_NAME: - param_type = ISCSI_IFACE_PARAM; - break; - default: - param_type = ISCSI_NET_PARAM; - } - - return t->attr_is_visible(param_type, param); + return t->attr_is_visible(ISCSI_NET_PARAM, param); } static struct attribute *iscsi_iface_attrs[] = { From 6d8e7e7c932162bccd06872362751b0e1d76f5af Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 2 Jul 2021 12:16:55 +0300 Subject: [PATCH 518/714] scsi: target: Fix protect handling in WRITE SAME(32) WRITE SAME(32) command handling reads WRPROTECT at the wrong offset in 1st byte instead of 10th byte. Link: https://lore.kernel.org/r/20210702091655.22818-1-d.bogdanov@yadro.com Fixes: afd73f1b60fc ("target: Perform PROTECT sanity checks for WRITE_SAME") Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_sbc.c | 35 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index b32f4ee88e79fd..ca1b2312d6e7b2 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -25,7 +25,7 @@ #include "target_core_alua.h" static sense_reason_t -sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool); +sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool); static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd); static sense_reason_t @@ -279,14 +279,14 @@ static inline unsigned long long transport_lba_64_ext(unsigned char *cdb) } static sense_reason_t -sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops) +sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops) { struct se_device *dev = cmd->se_dev; sector_t end_lba = dev->transport->get_blocks(dev) + 1; unsigned int sectors = sbc_get_write_same_sectors(cmd); sense_reason_t ret; - if ((flags[0] & 0x04) || (flags[0] & 0x02)) { + if ((flags & 0x04) || (flags & 0x02)) { pr_err("WRITE_SAME PBDATA and LBDATA" " bits not supported for Block Discard" " Emulation\n"); @@ -308,7 +308,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o } /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */ - if (flags[0] & 0x10) { + if (flags & 0x10) { pr_warn("WRITE SAME with ANCHOR not supported\n"); return TCM_INVALID_CDB_FIELD; } @@ -316,7 +316,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. */ - if (flags[0] & 0x08) { + if (flags & 0x08) { if (!ops->execute_unmap) return TCM_UNSUPPORTED_SCSI_OPCODE; @@ -331,7 +331,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o if (!ops->execute_write_same) return TCM_UNSUPPORTED_SCSI_OPCODE; - ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true); + ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true); if (ret) return ret; @@ -717,10 +717,9 @@ sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_ } static sense_reason_t -sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect, u32 sectors, bool is_write) { - u8 protect = cdb[1] >> 5; int sp_ops = cmd->se_sess->sup_prot_ops; int pi_prot_type = dev->dev_attrib.pi_prot_type; bool fabric_prot = false; @@ -768,7 +767,7 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, fallthrough; default: pr_err("Unable to determine pi_prot_type for CDB: 0x%02x " - "PROTECT: 0x%02x\n", cdb[0], protect); + "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect); return TCM_INVALID_CDB_FIELD; } @@ -843,7 +842,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -857,7 +856,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -871,7 +870,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -892,7 +891,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -906,7 +905,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -921,7 +920,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -980,7 +979,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[12]); - ret = sbc_setup_write_same(cmd, &cdb[10], ops); + ret = sbc_setup_write_same(cmd, cdb[10], ops); if (ret) return ret; break; @@ -1079,7 +1078,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[2]); - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; @@ -1097,7 +1096,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Follow sbcr26 with WRITE_SAME (10) and check for the existence * of byte 1 bit 3 UNMAP instead of original reserved field */ - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; From a3a9ee4b5254f212c2adaa8cd8ca03bfa112f49d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Jun 2021 19:25:56 +0200 Subject: [PATCH 519/714] drm/nouveau: init the base GEM fields for internal BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTMs buffer objects are based on GEM objects for quite a while and rely on initializing those fields before initializing the TTM BO. Nouveau now doesn't init the GEM object for internally allocated BOs, so make sure that we at least initialize some necessary fields. Signed-off-by: Christian König Tested-by: Mikko Perttunen Reviewed-by: Matthew Auld Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/msgid/20210609172902.1937-1-christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4f3a5357dd560c..6d07e653f82d5b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) */ if (bo->base.dev) drm_gem_object_release(&bo->base); + else + dma_resv_fini(&bo->base._resv); kfree(nvbo); } @@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (IS_ERR(nvbo)) return PTR_ERR(nvbo); + nvbo->bo.base.size = size; + dma_resv_init(&nvbo->bo.base._resv); + drm_vma_node_reset(&nvbo->bo.base.vma_node); + ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj); if (ret) return ret; From e4efa82660e6d80338c554e45e903714e1b2c27b Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 19 Jul 2021 11:02:31 +0800 Subject: [PATCH 520/714] ALSA: hda/realtek: Fix pop noise and 2 Front Mic issues on a machine This is a Lenovo ThinkStation machine which uses the codec alc623. There are 2 issues on this machine, the 1st one is the pop noise in the lineout, the 2nd one is there are 2 Front Mics and pulseaudio can't handle them, After applying the fixup of ALC623_FIXUP_LENOVO_THINKSTATION_P340 to this machine, the 2 issues are fixed. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210719030231.6870-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1389cfd5e0dbb6..caaf0e8aac111f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8626,6 +8626,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), From 7c9ff3deeee61b253715dcf968a6307af148c9b2 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 16 Jul 2021 11:21:13 -0700 Subject: [PATCH 521/714] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device The vmbus module uses a rotational algorithm to assign target CPUs to a device's channels. Depending on the timing of different device's channel offers, different channels of a device may be assigned to the same CPU. For example on a VM with 2 CPUs, if NIC A and B's channels are offered in the following order, NIC A will have both channels on CPU0, and NIC B will have both channels on CPU1 -- see below. This kind of assignment causes RSS load that is spreading across different channels to end up on the same CPU. Timing of channel offers: NIC A channel 0 NIC B channel 0 NIC A channel 1 NIC B channel 1 VMBUS ID 14: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter Device_ID = {cab064cd-1f31-47d5-a8b4-9d57e320cccd} Sysfs path: /sys/bus/vmbus/devices/cab064cd-1f31-47d5-a8b4-9d57e320cccd Rel_ID=14, target_cpu=0 Rel_ID=17, target_cpu=0 VMBUS ID 16: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter Device_ID = {244225ca-743e-4020-a17d-d7baa13d6cea} Sysfs path: /sys/bus/vmbus/devices/244225ca-743e-4020-a17d-d7baa13d6cea Rel_ID=16, target_cpu=1 Rel_ID=18, target_cpu=1 Update the vmbus CPU assignment algorithm to avoid duplicate CPU assignments within a device. The new algorithm iterates num_online_cpus + 1 times. The existing rotational algorithm to find "next NUMA & CPU" is still here. But if the resulting CPU is already used by the same device, it will try the next CPU. In the last iteration, it assigns the channel to the next available CPU like the existing algorithm. This is not normally expected, because during device probe, we limit the number of channels of a device to be <= number of online CPUs. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1626459673-17420-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 96 ++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index caf6d0c4bc1b1d..142308526ec6ae 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -605,6 +605,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (guid_equal(&channel->offermsg.offer.if_type, + &newchannel->offermsg.offer.if_type) && + guid_equal(&channel->offermsg.offer.if_instance, + &newchannel->offermsg.offer.if_instance)) { + fnew = false; + newchannel->primary_channel = channel; + break; + } + } + init_vp_index(newchannel); /* Remember the channels that should be cleaned up upon suspend. */ @@ -617,16 +628,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ atomic_dec(&vmbus_connection.offer_in_progress); - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (guid_equal(&channel->offermsg.offer.if_type, - &newchannel->offermsg.offer.if_type) && - guid_equal(&channel->offermsg.offer.if_instance, - &newchannel->offermsg.offer.if_instance)) { - fnew = false; - break; - } - } - if (fnew) { list_add_tail(&newchannel->listentry, &vmbus_connection.chn_list); @@ -647,7 +648,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) /* * Process the sub-channel. */ - newchannel->primary_channel = channel; list_add_tail(&newchannel->sc_list, &channel->sc_list); } @@ -683,6 +683,30 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) queue_work(wq, &newchannel->add_channel_work); } +/* + * Check if CPUs used by other channels of the same device. + * It should only be called by init_vp_index(). + */ +static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) +{ + struct vmbus_channel *primary = chn->primary_channel; + struct vmbus_channel *sc; + + lockdep_assert_held(&vmbus_connection.channel_mutex); + + if (!primary) + return false; + + if (primary->target_cpu == cpu) + return true; + + list_for_each_entry(sc, &primary->sc_list, sc_list) + if (sc != chn && sc->target_cpu == cpu) + return true; + + return false; +} + /* * We use this state to statically distribute the channel interrupt load. */ @@ -702,6 +726,7 @@ static int next_numa_node_id; static void init_vp_index(struct vmbus_channel *channel) { bool perf_chn = hv_is_perf_channel(channel); + u32 i, ncpu = num_online_cpus(); cpumask_var_t available_mask; struct cpumask *alloced_mask; u32 target_cpu; @@ -724,31 +749,38 @@ static void init_vp_index(struct vmbus_channel *channel) return; } - while (true) { - numa_node = next_numa_node_id++; - if (numa_node == nr_node_ids) { - next_numa_node_id = 0; - continue; + for (i = 1; i <= ncpu + 1; i++) { + while (true) { + numa_node = next_numa_node_id++; + if (numa_node == nr_node_ids) { + next_numa_node_id = 0; + continue; + } + if (cpumask_empty(cpumask_of_node(numa_node))) + continue; + break; + } + alloced_mask = &hv_context.hv_numa_map[numa_node]; + + if (cpumask_weight(alloced_mask) == + cpumask_weight(cpumask_of_node(numa_node))) { + /* + * We have cycled through all the CPUs in the node; + * reset the alloced map. + */ + cpumask_clear(alloced_mask); } - if (cpumask_empty(cpumask_of_node(numa_node))) - continue; - break; - } - alloced_mask = &hv_context.hv_numa_map[numa_node]; - if (cpumask_weight(alloced_mask) == - cpumask_weight(cpumask_of_node(numa_node))) { - /* - * We have cycled through all the CPUs in the node; - * reset the alloced map. - */ - cpumask_clear(alloced_mask); - } + cpumask_xor(available_mask, alloced_mask, + cpumask_of_node(numa_node)); - cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node)); + target_cpu = cpumask_first(available_mask); + cpumask_set_cpu(target_cpu, alloced_mask); - target_cpu = cpumask_first(available_mask); - cpumask_set_cpu(target_cpu, alloced_mask); + if (channel->offermsg.offer.sub_channel_index >= ncpu || + i > ncpu || !hv_cpuself_used(target_cpu, channel)) + break; + } channel->target_cpu = target_cpu; From 21ed49265986931b8921a2404394426870245bd2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Jul 2021 09:40:22 +0200 Subject: [PATCH 522/714] m68k: MAC should select HAVE_PATA_PLATFORM The defconfigs switched Mac from the deprecated CONFIG_BLK_DEV_PLATFORM to CONFIG_PATA_PLATFORM. However, the latter depends on CONFIG_HAVE_PATA_PLATFORM, which thus must be selected first. Fixes: b90257bfddbd01f3 ("m68k: use libata instead of the legacy ide driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210712074022.2116655-1-geert@linux-m68k.org --- arch/m68k/Kconfig.machine | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index d964c1f2739952..6a07a681788563 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -33,6 +33,7 @@ config MAC depends on MMU select MMU_MOTOROLA if MMU select HAVE_ARCH_NVRAM_OPS + select HAVE_PATA_PLATFORM select LEGACY_TIMER_TICK help This option enables support for the Apple Macintosh series of From 78d2a05ef22e7b5863b01e073dd6a06b3979bb00 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 15:28:18 +0300 Subject: [PATCH 523/714] ASoC: ti: j721e-evm: Fix unbalanced domain activity tracking during startup In case of an error within j721e_audio_startup() the domain->active must be decremented to avoid unbalanced counter. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717122820.1467-2-peter.ujfalusi@gmail.com Signed-off-by: Mark Brown --- sound/soc/ti/j721e-evm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index a7c0484d44ec79..017c4ad11ca6e1 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -278,23 +278,29 @@ static int j721e_audio_startup(struct snd_pcm_substream *substream) j721e_rule_rate, &priv->rate_range, SNDRV_PCM_HW_PARAM_RATE, -1); - mutex_unlock(&priv->mutex); if (ret) - return ret; + goto out; /* Reset TDM slots to 32 */ ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; for_each_rtd_codec_dais(rtd, i, codec_dai) { ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; } - return 0; + if (ret == -ENOTSUPP) + ret = 0; +out: + if (ret) + domain->active--; + mutex_unlock(&priv->mutex); + + return ret; } static int j721e_audio_hw_params(struct snd_pcm_substream *substream, From 82d28b67f780910f816fe1cfb0f676fc38c4cbb3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 15:28:19 +0300 Subject: [PATCH 524/714] ASoC: ti: j721e-evm: Check for not initialized parent_clk_id During probe the parent_clk_id is set to -1 which should not be used to array index within hsdiv_rates[]. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717122820.1467-3-peter.ujfalusi@gmail.com Signed-off-by: Mark Brown --- sound/soc/ti/j721e-evm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index 017c4ad11ca6e1..265bbc5a2f96a8 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -197,7 +197,7 @@ static int j721e_configure_refclk(struct j721e_priv *priv, return ret; } - if (priv->hsdiv_rates[domain->parent_clk_id] != scki) { + if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) { dev_dbg(priv->dev, "%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n", audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI", From 56912da7a68c8356df6a6740476237441b0b792a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 16 Jul 2021 20:21:33 +0200 Subject: [PATCH 525/714] spi: cadence: Correct initialisation of runtime PM again The original implementation of RPM handling in probe() was mostly correct, except it failed to call pm_runtime_get_*() to activate the hardware. The subsequent fix, 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM"), breaks the implementation further, to the point where the system using this hard IP on ZynqMP hangs on boot, because it accesses hardware which is gated off. Undo 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM") and instead add missing pm_runtime_get_noresume() and move the RPM disabling all the way to the end of probe(). That makes ZynqMP not hang on boot yet again. Fixes: 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM") Signed-off-by: Marek Vasut Cc: Charles Keepax Cc: Mark Brown Link: https://lore.kernel.org/r/20210716182133.218640-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index a3afd1b9ac567b..ceb16e70d235af 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -517,6 +517,12 @@ static int cdns_spi_probe(struct platform_device *pdev) goto clk_dis_apb; } + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + ret = of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs); if (ret < 0) master->num_chipselect = CDNS_SPI_DEFAULT_NUM_CS; @@ -531,11 +537,6 @@ static int cdns_spi_probe(struct platform_device *pdev) /* SPI controller initializations */ cdns_spi_init_hw(xspi); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); - irq = platform_get_irq(pdev, 0); if (irq <= 0) { ret = -ENXIO; @@ -566,6 +567,9 @@ static int cdns_spi_probe(struct platform_device *pdev) master->bits_per_word_mask = SPI_BPW_MASK(8); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + ret = spi_register_master(master); if (ret) { dev_err(&pdev->dev, "spi_register_master failed\n"); From c9d9fdbc108af8915d3f497bbdf3898bf8f321b8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:15 -0500 Subject: [PATCH 526/714] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser" This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser"). The justification for this commit in the git history was a vague comment about getting it out from under the struct_mutex. While this may improve perf for some workloads on Gen7 platforms where we rely on the command parser for features such as indirect rendering, no numbers were provided to prove such an improvement. It claims to closed two gitlab/bugzilla issues but with no explanation whatsoever as to why or what bug it's fixing. Meanwhile, by moving command parsing off to an async callback, it leaves us with a problem of what to do on error. When things were synchronous, EXECBUFFER2 would fail with an error code if parsing failed. When moving it to async, we needed another way to handle that error and the solution employed was to set an error on the dma_fence and then trust that said error gets propagated to the client eventually. Moving back to synchronous will help us untangle the fence error propagation mess. This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer") which is a refactor of some of our allocation paths for asynchronous parsing. Now that everything is synchronous, we don't need it. v2 (Daniel Vetter): - Add stabel Cc and Fixes tag Signed-off-by: Jason Ekstrand Cc: # v5.6+ Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Cc: Maarten Lankhorst Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-2-jason@jlekstrand.net (cherry picked from commit 93b713304188844b8514074dc13ffd56d12235d3) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 227 +----------------- .../i915/gem/selftests/i915_gem_execbuffer.c | 4 + drivers/gpu/drm/i915/i915_cmd_parser.c | 132 +++++----- drivers/gpu/drm/i915/i915_drv.h | 7 +- 4 files changed, 91 insertions(+), 279 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a8abc9af5ff47e..4a6419d7be93c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; struct intel_engine_cs *engine = eb->engine; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; if (!engine) @@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53ae9..16162fc2782dc2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3992c25a191dae..00ec618d015900 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, unsigned long offset, unsigned long length, - void *dst, const void *src) + bool *needs_clflush_after) { - bool needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - if (src) { - GEM_BUG_ON(!needs_clflush); - i915_unaligned_memcpy_from_wc(dst, src + offset, length); - } else { - struct scatterlist *sg; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; + int ret; + + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); + if (IS_ERR(dst)) + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && i915_has_memcpy_from_wc()) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); + i915_gem_object_unpin_map(src_obj); + } + } + if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - unsigned int x, sg_ofs; - unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * validate up to the end of the batch. */ remain = length; - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + if (dst_needs_clflush & CLFLUSH_BEFORE) remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); - - while (remain) { - unsigned long sg_max = sg->length >> PAGE_SHIFT; - - for (; remain && sg_ofs < sg_max; sg_ofs++) { - unsigned long len = min(remain, PAGE_SIZE - x); - void *map; - - map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); - if (needs_clflush) - drm_clflush_virt_range(map + x, len); - memcpy(ptr, map + x, len); - kunmap_atomic(map); - - ptr += len; - remain -= len; - x = 0; - } - - sg_ofs = 0; - sg = sg_next(sg); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); + kunmap_atomic(src); + + ptr += len; + remain -= len; + x = 0; } } + i915_gem_object_finish_access(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } @@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -/** - * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() - * @batch_length: length of the commands in batch_obj - * @trampoline: Whether jump trampolines are used. - * - * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). - * This has to be preallocated, because the command parser runs in signaling context, - * and may not allocate any memory. - * - * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use - * IS_ERR() to check for errors. Must bre freed() with kfree(). - * - * NULL is a valid value, meaning no allocation was required. - */ -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; - if (trampoline) - return NULL; - /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1425,21 +1426,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map) + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; + unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; - bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1447,8 +1448,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, - shadow_map, batch_map); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); + if (IS_ERR(cmd)) { + DRM_DEBUG("CMD: Failed to copy batch\n"); + return PTR_ERR(cmd); + } + + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1549,6 +1560,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38ff2fb897443e..b30397b0452904 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline); - int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map); + bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ From 3761baae908a7b5012be08d70fa553cc2eb82305 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: [PATCH 527/714] Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.net/ for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand Reported-by: Marcin Slusarz Cc: # v5.6+ Cc: Jason Ekstrand Cc: Marcin Slusarz Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter Reviewed-by: Jon Bloomfield Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jason@jlekstrand.net (cherry picked from commit 93a2711cddd5760e2f0f901817d71c93183c3b87) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_request.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1014c71cf7f52a..37aef13085739e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along From d2cbbf1fe503c07e466c62f83aa1926d74d15821 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 15 Jul 2021 11:26:01 +0200 Subject: [PATCH 528/714] ACPI: Kconfig: Fix table override from built-in initrd During a rework of initramfs code the INITRAMFS_COMPRESSION config option was removed in commit 65e00e04e5ae. A leftover as a dependency broke the config option ACPI_TABLE_OVERRIDE_VIA_ BUILTIN_INITRD that is used to enable the overriding of ACPI tables from built-in initrd. Fixing the dependency. Fixes: 65e00e04e5ae ("initramfs: refactor the initramfs build rules") Signed-off-by: Robert Richter Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 9d872ea477a6c1..8f9940f40baa89 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,7 +370,7 @@ config ACPI_TABLE_UPGRADE config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD bool "Override ACPI tables from built-in initrd" depends on ACPI_TABLE_UPGRADE - depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION="" + depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE help This option provides functionality to override arbitrary ACPI tables from built-in uncompressed initrd. From 71f6428332844f38c7cb10461d9f29e9c9b983a0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Jul 2021 21:21:21 +0300 Subject: [PATCH 529/714] ACPI: utils: Fix reference counting in for_each_acpi_dev_match() Currently it's possible to iterate over the dangling pointer in case the device suddenly disappears. This may happen becase callers put it at the end of a loop. Instead, let's move that call inside acpi_dev_get_next_match_dev(). Fixes: 803abec64ef9 ("media: ipu3-cio2: Add cio2-bridge to ipu3-cio2 driver") Fixes: bf263f64e804 ("media: ACPI / bus: Add acpi_dev_get_next_match_dev() and helper macro") Fixes: edbd1bc4951e ("efi/dev-path-parser: Switch to use for_each_acpi_dev_match()") Signed-off-by: Andy Shevchenko Reviewed-by: Daniel Scally Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 7 +++---- drivers/firmware/efi/dev-path-parser.c | 1 - drivers/media/pci/intel/ipu3/cio2-bridge.c | 6 ++---- include/acpi/acpi_bus.h | 5 ----- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index e7ddd281afff7f..d5cedffeeff915 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -860,11 +860,9 @@ EXPORT_SYMBOL(acpi_dev_present); * Return the next match of ACPI device if another matching device was present * at the moment of invocation, or NULL otherwise. * - * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g. - * in the case of a hotplug event. That said, the caller should ensure that - * this will never happen. - * * The caller is responsible for invoking acpi_dev_put() on the returned device. + * On the other hand the function invokes acpi_dev_put() on the given @adev + * assuming that its reference counter had been increased beforehand. * * See additional information in acpi_dev_present() as well. */ @@ -880,6 +878,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha match.hrv = hrv; dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb); + acpi_dev_put(adev); return dev ? to_acpi_device(dev) : NULL; } EXPORT_SYMBOL(acpi_dev_get_next_match_dev); diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index 10d4457417a4ff..eb9c65f9784199 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -34,7 +34,6 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, break; if (!adev->pnp.unique_id && node->acpi.uid == 0) break; - acpi_dev_put(adev); } if (!adev) return -ENODEV; diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c index 4657e99df0339e..59a36f92267555 100644 --- a/drivers/media/pci/intel/ipu3/cio2-bridge.c +++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c @@ -173,10 +173,8 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, int ret; for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) { - if (!adev->status.enabled) { - acpi_dev_put(adev); + if (!adev->status.enabled) continue; - } if (bridge->n_sensors >= CIO2_NUM_PORTS) { acpi_dev_put(adev); @@ -185,7 +183,6 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, } sensor = &bridge->sensors[bridge->n_sensors]; - sensor->adev = adev; strscpy(sensor->name, cfg->hid, sizeof(sensor->name)); ret = cio2_bridge_read_acpi_buffer(adev, "SSDB", @@ -215,6 +212,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, goto err_free_swnodes; } + sensor->adev = acpi_dev_get(adev); adev->fwnode.secondary = fwnode; dev_info(&cio2->dev, "Found supported sensor %s\n", diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1ae993fee4a5d4..b9d434a936320b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -707,11 +707,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); * @hrv: Hardware Revision of the device, pass -1 to not check _HRV * * The caller is responsible for invoking acpi_dev_put() on the returned device. - * - * FIXME: Due to above requirement there is a window that may invalidate @adev - * and next iteration will use a dangling pointer, e.g. in the case of a - * hotplug event. That said, the caller should ensure that this will never - * happen. */ #define for_each_acpi_dev_match(adev, hid, uid, hrv) \ for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv); \ From c81cfb6256d90ea5ba4a6fb280ea3b171be4e05c Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 18 Jul 2021 15:36:25 -0400 Subject: [PATCH 530/714] bnxt_en: don't disable an already disabled PCI device If device is already disabled in reset path and PCI io error is detected before the device could be enabled, driver could call pci_disable_device() for already disabled device. Fix this problem by calling pci_disable_device() only if the device is already enabled. Fixes: 6316ea6db93d ("bnxt_en: Enable AER support.") Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f56245eeef7b15..fdfb75a1608d7e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13436,7 +13436,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) bnxt_close(netdev); - pci_disable_device(pdev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; From c08c59653415201ac46ab791c936ae804c45a11b Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 18 Jul 2021 15:36:26 -0400 Subject: [PATCH 531/714] bnxt_en: reject ETS settings that will starve a TC ETS proportions are presented to HWRM_QUEUE_COS2BW_CFG as minimum bandwidth constraints. Thus, zero is a legal value for a given TC. However, if all the other TCs sum up to 100%, then at least one hardware queue will starve, resulting in guaranteed TX timeouts. Reject such nonsensical configurations. Reviewed-by: Pavan Chebbi Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 8e90224c43a214..8a68df4d9e59cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -433,6 +433,7 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app, static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) { int total_ets_bw = 0; + bool zero = false; u8 max_tc = 0; int i; @@ -453,13 +454,20 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) break; case IEEE_8021QAZ_TSA_ETS: total_ets_bw += ets->tc_tx_bw[i]; + zero = zero || !ets->tc_tx_bw[i]; break; default: return -ENOTSUPP; } } - if (total_ets_bw > 100) + if (total_ets_bw > 100) { + netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n"); return -EINVAL; + } + if (zero && total_ets_bw == 100) { + netdev_warn(bp->dev, "rejecting ETS config starving a TC\n"); + return -EINVAL; + } if (max_tc >= bp->max_tc) *tc = bp->max_tc; From 2c9f046bc377efd1f5e26e74817d5f96e9506c86 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:27 -0400 Subject: [PATCH 532/714] bnxt_en: Refresh RoCE capabilities in bnxt_ulp_probe() The capabilities can change after firmware upgrade/downgrade, so we should get the up-to-date RoCE capabilities everytime bnxt_ulp_probe() is called. Fixes: 2151fe0830fd ("bnxt_en: Handle RESET_NOTIFY async event from firmware.") Reviewed-by: Somnath Kotur Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a918e374f3c5c4..187ff643ad2aea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -479,16 +479,17 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev) if (!edev) return ERR_PTR(-ENOMEM); edev->en_ops = &bnxt_en_ops_tbl; - if (bp->flags & BNXT_FLAG_ROCEV1_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; - if (bp->flags & BNXT_FLAG_ROCEV2_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; edev->net = dev; edev->pdev = bp->pdev; edev->l2_db_size = bp->db_size; edev->l2_db_size_nc = bp->db_size; bp->edev = edev; } + edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP; + if (bp->flags & BNXT_FLAG_ROCEV1_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; + if (bp->flags & BNXT_FLAG_ROCEV2_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; return bp->edev; } EXPORT_SYMBOL(bnxt_ulp_probe); From 6cd657cb3ee6f4de57e635b126ffbe0e51d00f1a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:28 -0400 Subject: [PATCH 533/714] bnxt_en: Add missing check for BNXT_STATE_ABORT_ERR in bnxt_fw_rset_task() In the BNXT_FW_RESET_STATE_POLL_VF state in bnxt_fw_reset_task() after all VFs have unregistered, we need to check for BNXT_STATE_ABORT_ERR after we acquire the rtnl_lock. If the flag is set, we need to abort. Fixes: 230d1f0de754 ("bnxt_en: Handle firmware reset.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fdfb75a1608d7e..39908a3d9460fb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11992,6 +11992,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) } bp->fw_reset_timestamp = jiffies; rtnl_lock(); + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + rtnl_unlock(); + goto fw_reset_abort; + } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; From 3958b1da725a477b4a222183d16a14d85445d4b6 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sun, 18 Jul 2021 15:36:29 -0400 Subject: [PATCH 534/714] bnxt_en: fix error path of FW reset When bnxt_open() fails in the firmware reset path, the driver needs to gracefully abort, but it is executing code that should be invoked only in the success path. Define a function to abort FW reset and consolidate all error paths to call this new function. Fixes: dab62e7c2de7 ("bnxt_en: Implement faster recovery for firmware fatal error.") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 39908a3d9460fb..f2f1136fd492a6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11959,10 +11959,21 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp) (bp->fw_reset_max_dsecs * HZ / 10)); } +static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) +{ + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { + bnxt_ulp_start(bp, rc); + bnxt_dl_health_status_update(bp, false); + } + bp->fw_reset_state = 0; + dev_close(bp->dev); +} + static void bnxt_fw_reset_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); - int rc; + int rc = 0; if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); @@ -11993,8 +12004,9 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_timestamp = jiffies; rtnl_lock(); if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); - goto fw_reset_abort; + return; } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { @@ -12043,6 +12055,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (val == 0xffff) { if (bnxt_fw_reset_timeout(bp)) { netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); + rc = -ETIMEDOUT; goto fw_reset_abort; } bnxt_queue_fw_reset_work(bp, HZ / 1000); @@ -12052,6 +12065,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); if (pci_enable_device(bp->pdev)) { netdev_err(bp->dev, "Cannot re-enable PCI device\n"); + rc = -ENODEV; goto fw_reset_abort; } pci_set_master(bp->pdev); @@ -12078,9 +12092,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) } rc = bnxt_open(bp->dev); if (rc) { - netdev_err(bp->dev, "bnxt_open_nic() failed\n"); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - dev_close(bp->dev); + netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); + return; } bp->fw_reset_state = 0; @@ -12107,12 +12122,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); } fw_reset_abort: - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) - bnxt_dl_health_status_update(bp, false); - bp->fw_reset_state = 0; rtnl_lock(); - dev_close(bp->dev); + bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); } From 96bdd4b9ea7ef9a12db8fdd0ce90e37dffbd3703 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:30 -0400 Subject: [PATCH 535/714] bnxt_en: Validate vlan protocol ID on RX packets Only pass supported VLAN protocol IDs for stripped VLAN tags to the stack. The stack will hit WARN() if the protocol ID is unsupported. Existing firmware sets up the chip to strip 0x8100, 0x88a8, 0x9100. Only the 1st two protocols are supported by the kernel. Fixes: a196e96bb68f ("bnxt_en: clean up VLAN feature bit handling") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f2f1136fd492a6..169f093e01de2e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1671,11 +1671,16 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { - u16 vlan_proto = tpa_info->metadata >> - RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(tpa_info->metadata >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + return NULL; + } } skb_checksum_none_assert(skb); @@ -1897,9 +1902,15 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; - u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(meta_data >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + goto next_rx; + } } skb_checksum_none_assert(skb); From 11a39259ff79b74bc99f8b7c44075a2d6d5e7ab1 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sun, 18 Jul 2021 15:36:31 -0400 Subject: [PATCH 536/714] bnxt_en: Check abort error state in bnxt_half_open_nic() bnxt_half_open_nic() is called during during ethtool self test and is protected by rtnl_lock. Firmware reset can be happening at the same time. Only critical portions of the entire firmware reset sequence are protected by the rtnl_lock. It is possible that bnxt_half_open_nic() can be called when the firmware reset sequence is aborting. In that case, bnxt_half_open_nic() needs to check if the ABORT_ERR flag is set and abort if it is. The ethtool self test will fail but the NIC will be brought to a consistent IF_DOWN state. Without this patch, if bnxt_half_open_nic() were to continue in this error state, it may crash like this: bnxt_en 0000:82:00.1 enp130s0f1np1: FW reset in progress during close, FW reset will be aborted Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Process ethtool (pid: 333327, stack limit = 0x0000000046476577) Call trace: bnxt_alloc_mem+0x444/0xef0 [bnxt_en] bnxt_half_open_nic+0x24/0xb8 [bnxt_en] bnxt_self_test+0x2dc/0x390 [bnxt_en] ethtool_self_test+0xe0/0x1f8 dev_ethtool+0x1744/0x22d0 dev_ioctl+0x190/0x3e0 sock_ioctl+0x238/0x480 do_vfs_ioctl+0xc4/0x758 ksys_ioctl+0x84/0xb8 __arm64_sys_ioctl+0x28/0x38 el0_svc_handler+0xb0/0x180 el0_svc+0x8/0xc Fixes: a1301f08c5ac ("bnxt_en: Check abort error state in bnxt_open_nic().") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 169f093e01de2e..31eb3c00851af5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10208,6 +10208,12 @@ int bnxt_half_open_nic(struct bnxt *bp) { int rc = 0; + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n"); + rc = -ENODEV; + goto half_open_err; + } + rc = bnxt_alloc_mem(bp, false); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); From d7859afb6880249039b178fdfb1bef94fd954cf2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:32 -0400 Subject: [PATCH 537/714] bnxt_en: Move bnxt_ptp_init() to bnxt_open() The device needs to be in ifup state for PTP to function, so move bnxt_ptp_init() to bnxt_open(). This means that the PHC will be registered during bnxt_open(). This also makes firmware reset work correctly. PTP configurations may change after firmware upgrade or downgrade. bnxt_open() will be called after firmware reset, so it will work properly. bnxt_ptp_start() is now incorporated into bnxt_ptp_init(). We now also need to call bnxt_ptp_clear() in bnxt_close(). Fixes: 93cb62d98e9c ("bnxt_en: Enable hardware PTP support") Cc: Richard Cochran Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 16 +++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 24 ++++++------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 - 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 31eb3c00851af5..b8b73c21099503 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10134,7 +10134,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } } - bnxt_ptp_start(bp); rc = bnxt_init_nic(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); @@ -10273,9 +10272,16 @@ static int bnxt_open(struct net_device *dev) rc = bnxt_hwrm_if_change(bp, true); if (rc) return rc; + + if (bnxt_ptp_init(bp)) { + netdev_warn(dev, "PTP initialization failed.\n"); + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } rc = __bnxt_open_nic(bp, true, true); if (rc) { bnxt_hwrm_if_change(bp, false); + bnxt_ptp_clear(bp); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -10366,6 +10372,7 @@ static int bnxt_close(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); + bnxt_ptp_clear(bp); bnxt_hwmon_close(bp); bnxt_close_nic(bp, true, true); bnxt_hwrm_shutdown_link(bp); @@ -11352,6 +11359,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); } + bnxt_ptp_clear(bp); __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); @@ -12694,7 +12702,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); - bnxt_ptp_clear(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); @@ -13278,11 +13285,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc); } - if (bnxt_ptp_init(bp)) { - netdev_warn(dev, "PTP initialization failed.\n"); - kfree(bp->ptp_cfg); - bp->ptp_cfg = NULL; - } bnxt_inv_fw_health_reg(bp); bnxt_dl_register(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f698b6bd4ff871..9089e7f3fbd456 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -385,22 +385,6 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) return 0; } -void bnxt_ptp_start(struct bnxt *bp) -{ - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - - if (!ptp) - return; - - if (bp->flags & BNXT_FLAG_CHIP_P5) { - spin_lock_bh(&ptp->ptp_lock); - ptp->current_time = bnxt_refclk_read(bp, NULL); - WRITE_ONCE(ptp->old_time, ptp->current_time); - spin_unlock_bh(&ptp->ptp_lock); - ptp_schedule_worker(ptp->ptp_clock, 0); - } -} - static const struct ptp_clock_info bnxt_ptp_caps = { .owner = THIS_MODULE, .name = "bnxt clock", @@ -450,7 +434,13 @@ int bnxt_ptp_init(struct bnxt *bp) bnxt_unmap_ptp_regs(bp); return err; } - + if (bp->flags & BNXT_FLAG_CHIP_P5) { + spin_lock_bh(&ptp->ptp_lock); + ptp->current_time = bnxt_refclk_read(bp, NULL); + WRITE_ONCE(ptp->old_time, ptp->current_time); + spin_unlock_bh(&ptp->ptp_lock); + ptp_schedule_worker(ptp->ptp_clock, 0); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 6b6245750e206e..4135ea3ec7889b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -75,7 +75,6 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); -void bnxt_ptp_start(struct bnxt *bp); int bnxt_ptp_init(struct bnxt *bp); void bnxt_ptp_clear(struct bnxt *bp); #endif From de5bf19414fec860168f05d00d574562bd9d86d1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:33 -0400 Subject: [PATCH 538/714] bnxt_en: Fix PTP capability discovery The current PTP initialization logic does not account for firmware reset that may cause PTP capability to change. The valid pointer bp->ptp_cfg is used to indicate that the device is capable of PTP and that it has been initialized. So we must clean up bp->ptp_cfg and free it if the firmware after reset does not support PTP. Fixes: 93cb62d98e9c ("bnxt_en: Enable hardware PTP support") Cc: Richard Cochran Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b8b73c21099503..4db162cee911e9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7574,8 +7574,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_WOL_CAP; if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED) bp->flags |= BNXT_FLAG_WOL_CAP; - if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) + if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { __bnxt_hwrm_ptp_qcfg(bp); + } else { + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } } else { #ifdef CONFIG_BNXT_SRIOV struct bnxt_vf_info *vf = &bp->vf; From b16f3299ae1aa3c327e1fb742d0379ae4d6e86f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 18 Jul 2021 13:38:34 -0700 Subject: [PATCH 539/714] net: hisilicon: rename CACHE_LINE_MASK to avoid redefinition Building on ARCH=arc causes a "redefined" warning, so rename this driver's CACHE_LINE_MASK to avoid the warning. ../drivers/net/ethernet/hisilicon/hip04_eth.c:134: warning: "CACHE_LINE_MASK" redefined 134 | #define CACHE_LINE_MASK 0x3F In file included from ../include/linux/cache.h:6, from ../include/linux/printk.h:9, from ../include/linux/kernel.h:19, from ../include/linux/list.h:9, from ../include/linux/module.h:12, from ../drivers/net/ethernet/hisilicon/hip04_eth.c:7: ../arch/arc/include/asm/cache.h:17: note: this is the location of the previous definition 17 | #define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1)) Fixes: d413779cdd93 ("net: hisilicon: Add an tx_desc to adapt HI13X1_GMAC") Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: Jiangfeng Xiao Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 12f6c2442a7ad8..e53512f6878afd 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -131,7 +131,7 @@ /* buf unit size is cache_line_size, which is 64, so the shift is 6 */ #define PPE_BUF_SIZE_SHIFT 6 #define PPE_TX_BUF_HOLD BIT(31) -#define CACHE_LINE_MASK 0x3F +#define SOC_CACHE_LINE_MASK 0x3F #else #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 #define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 @@ -531,8 +531,8 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) #if defined(CONFIG_HI13X1_GMAC) desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); - desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); - desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); + desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK); + desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK); #else desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); desc->send_addr = (__force u32)cpu_to_be32(phys); From bdad810eb97875813a067504424a483aaa309bad Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:19 +0800 Subject: [PATCH 540/714] dt-bindings: net: snps,dwmac: add missing DWMAC IP version Add missing DWMAC IP version in snps,dwmac.yaml which found by below command, as NXP i.MX8 families support SNPS DWMAC 5.10a IP. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml Documentation/devicetree/bindings/net/nxp,dwmac-imx.example.dt.yaml: ethernet@30bf0000: compatible: None of ['nxp,imx8mp-dwmac-eqos', 'snps,dwmac-5.10a'] are valid under the given schema Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index d7652596a09b37..42689b7d03a2ff 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -28,6 +28,7 @@ select: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -82,6 +83,7 @@ properties: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -375,6 +377,7 @@ allOf: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac From e314a07ef263916f761b736ded7a30894709dfd7 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:20 +0800 Subject: [PATCH 541/714] dt-bindings: net: imx-dwmac: convert imx-dwmac bindings to yaml In order to automate the verification of DT nodes covert imx-dwmac to nxp,dwmac-imx.yaml, and pass below checking. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- .../devicetree/bindings/net/imx-dwmac.txt | 56 ----------- .../bindings/net/nxp,dwmac-imx.yaml | 93 +++++++++++++++++++ 2 files changed, 93 insertions(+), 56 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/imx-dwmac.txt create mode 100644 Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt deleted file mode 100644 index 921d522fe8d776..00000000000000 --- a/Documentation/devicetree/bindings/net/imx-dwmac.txt +++ /dev/null @@ -1,56 +0,0 @@ -IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. - -This file documents platform glue layer for IMX. -Please see stmmac.txt for the other unchanged properties. - -The device node has following properties. - -Required properties: -- compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer - and "snps,dwmac-5.10a" to select IP version. -- clocks: Must contain a phandle for each entry in clock-names. -- clock-names: Should be "stmmaceth" for the host clock. - Should be "pclk" for the MAC apb clock. - Should be "ptp_ref" for the MAC timer clock. - Should be "tx" for the MAC RGMII TX clock: - Should be "mem" for EQOS MEM clock. - - "mem" clock is required for imx8dxl platform. - - "mem" clock is not required for imx8mp platform. -- interrupt-names: Should contain a list of interrupt names corresponding to - the interrupts in the interrupts property, if available. - Should be "macirq" for the main MAC IRQ - Should be "eth_wake_irq" for the IT which wake up system -- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which - encompases the GPR register, and the offset of the GPR register. - - required for imx8mp platform. - - is optional for imx8dxl platform. - -Optional properties: -- intf_mode: is optional for imx8dxl platform. -- snps,rmii_refclk_ext: to select RMII reference clock from external. - -Example: - eqos: ethernet@30bf0000 { - compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; - reg = <0x30bf0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; - clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, - <&clk IMX8MP_CLK_QOS_ENET_ROOT>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; - assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, - <&clk IMX8MP_SYS_PLL2_100M>, - <&clk IMX8MP_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <100000000>, <125000000>; - nvmem-cells = <ð_mac0>; - nvmem-cell-names = "mac-address"; - nvmem_macaddr_swap; - intf_mode = <&gpr 0x4>; - status = "disabled"; - }; diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml new file mode 100644 index 00000000000000..5629b2e4ccf89b --- /dev/null +++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP i.MX8 DWMAC glue layer Device Tree Bindings + +maintainers: + - Joakim Zhang + +# We need a select here so we don't match all nodes with 'snps,dwmac' +select: + properties: + compatible: + contains: + enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + required: + - compatible + +allOf: + - $ref: "snps,dwmac.yaml#" + +properties: + compatible: + oneOf: + - items: + - enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + - const: snps,dwmac-5.10a + + clocks: + minItems: 3 + maxItems: 5 + items: + - description: MAC host clock + - description: MAC apb clock + - description: MAC timer clock + - description: MAC RGMII TX clock + - description: EQOS MEM clock + + clock-names: + minItems: 3 + maxItems: 5 + contains: + enum: + - stmmaceth + - pclk + - ptp_ref + - tx + - mem + + intf_mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be phandle/offset pair. The phandle to the syscon node which + encompases the GPR register, and the offset of the GPR register. + + snps,rmii_refclk_ext: + $ref: /schemas/types.yaml#/definitions/flag + description: + To select RMII reference clock from external. + +required: + - compatible + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + eqos: ethernet@30bf0000 { + compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a"; + reg = <0x30bf0000 0x10000>; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, + <&clk IMX8MP_CLK_ENET_QOS>; + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; + phy-mode = "rgmii"; + status = "disabled"; + }; From 77e5253deadf7fae59207330e3a639e592ee7892 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:21 +0800 Subject: [PATCH 542/714] arm64: dts: imx8mp: change interrupt order per dt-binding This patch changs interrupt order which found by dtbs_check. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml arch/arm64/boot/dts/freescale/imx8mp-evk.dt.yaml: ethernet@30bf0000: interrupt-names:0: 'macirq' was expected arch/arm64/boot/dts/freescale/imx8mp-evk.dt.yaml: ethernet@30bf0000: interrupt-names:1: 'eth_wake_irq' was expected According to Documentation/devicetree/bindings/net/snps,dwmac.yaml, we should list interrupt in it's order. Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 9f7c7f587d38bc..ca38d0d6c3c4ac 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -821,9 +821,9 @@ eqos: ethernet@30bf0000 { compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x30bf0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, <&clk IMX8MP_CLK_QOS_ENET_ROOT>, <&clk IMX8MP_CLK_ENET_QOS_TIMER>, From 6f20c8adb1813467ea52c1296d52c4e95978cb2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 02:12:18 -0700 Subject: [PATCH 543/714] net/tcp_fastopen: fix data races around tfo_active_disable_stamp tfo_active_disable_stamp is read and written locklessly. We need to annotate these accesses appropriately. Then, we need to perform the atomic_inc(tfo_active_disable_times) after the timestamp has been updated, and thus add barriers to make sure tcp_fastopen_active_should_disable() wont read a stale timestamp. Fixes: cf1ef3f0719b ("net/tcp_fastopen: Disable active side TFO in certain scenarios") Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 47c32604d38fca..b32af76e213253 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -507,8 +507,15 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ + WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); + + /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). + * We want net->ipv4.tfo_active_disable_stamp to be updated first. + */ + smp_mb__before_atomic(); atomic_inc(&net->ipv4.tfo_active_disable_times); - net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } @@ -526,10 +533,16 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) if (!tfo_da_times) return false; + /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ + smp_rmb(); + /* Limit timeout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * tfo_bh_timeout * HZ; - if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) + + /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ + timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + + multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, timeout)) return true; /* Mark check bit so we can check for successful active TFO From fa2bf6baf2b1d8350e5193ce4014bdddc51a25d0 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:32 +0530 Subject: [PATCH 544/714] octeontx2-af: Enable transmit side LBK link For enabling VF-VF switching the packets egressing out of CGX mapped VFs needed to be sent to LBK so that same packets are received back to the system. But the LBK link also needs to be enabled in addition to a VF's mapped CGX_LMAC link otherwise hardware raises send error interrupt indicating selected LBK link is not enabled in NIX_AF_TL3_TL2X_LINKX_CFG register. Hence this patch enables all LBK links in TL3_TL2_LINKX_CFG registers. Also to enable packet flow between PFs/VFs of NIX0 to PFs/VFs of NIX1(in 98xx silicon) the NPC TX DMAC rules has to be installed such that rules must be hit for any TX interface i.e., NIX0-TX or NIX1-TX provided DMAC match creteria is met. Hence this patch changes the behavior such that MCAM is programmed to match with any NIX0/1-TX interface for TX rules. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 6 ++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 ++ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 32 +++++++++++++++++++ .../ethernet/marvell/octeontx2/af/rvu_npc.c | 15 +++++++-- .../marvell/octeontx2/af/rvu_npc_fs.c | 9 +++++- 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 10cddf1ac7b9ef..086eb6d283ee43 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2859,6 +2859,12 @@ static int rvu_enable_sriov(struct rvu *rvu) if (!vfs) return 0; + /* LBK channel number 63 is used for switching packets between + * CGX mapped VFs. Hence limit LBK pairs till 62 only. + */ + if (vfs > 62) + vfs = 62; + /* Save VFs number for reference in VF interrupts handlers. * Since interrupts might start arriving during SRIOV enablement * ordinary API cannot be used to get number of enabled VFs. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 10e58a5d5861ac..e53f530e5e3196 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -415,6 +415,8 @@ struct npc_kpu_profile_adapter { size_t kpus; }; +#define RVU_SWITCH_LBK_CHAN 63 + struct rvu { void __iomem *afreg_base; void __iomem *pfreg_base; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index aeae3770442807..a2d69eaac4f882 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1952,6 +1952,35 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); } +static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, + u16 pcifunc, struct nix_txsch *txsch) +{ + struct rvu_hwinfo *hw = rvu->hw; + int lbk_link_start, lbk_links; + u8 pf = rvu_get_pf(pcifunc); + int schq; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + lbk_link_start = hw->cgx_links; + + for (schq = 0; schq < txsch->schq.max; schq++) { + if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) + continue; + /* Enable all LBK links with channel 63 by default so that + * packets can be sent to LBK with a NPC TX MCAM rule + */ + lbk_links = hw->lbk_links; + while (lbk_links--) + rvu_write64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(schq, + lbk_link_start + + lbk_links), + BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + } +} + int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, struct nix_txschq_config *req, struct msg_rsp *rsp) @@ -2040,6 +2069,9 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } + rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, + &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3612e0a2cab324..16c557cbe6a09b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -468,6 +468,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, { int bank = npc_get_bank(mcam, index); int kw = 0, actbank, actindex; + u8 tx_intf_mask = ~intf & 0x3; + u8 tx_intf = intf; u64 cam0, cam1; actbank = bank; /* Save bank id, to set action later on */ @@ -488,12 +490,21 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, */ for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) { /* Interface should be set in all banks */ + if (is_npc_intf_tx(intf)) { + /* Last bit must be set and rest don't care + * for TX interfaces + */ + tx_intf_mask = 0x1; + tx_intf = intf & tx_intf_mask; + tx_intf_mask = ~tx_intf & tx_intf_mask; + } + rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1), - intf); + tx_intf); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0), - ~intf & 0x3); + tx_intf_mask); /* Set the match key */ npc_get_keyword(entry, kw, &cam0, &cam1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 68633145a8b809..92d64bdff0ea27 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -949,9 +949,16 @@ static void npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_tx_action action; + u64 mask = ~0ULL; + + /* If AF is installing then do not care about + * PF_FUNC in Send Descriptor + */ + if (is_pffunc_af(req->hdr.pcifunc)) + mask = 0; npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), - 0, ~0ULL, 0, NIX_INTF_TX); + 0, mask, 0, NIX_INTF_TX); *(u64 *)&action = 0x00; action.op = req->op; From cb7a6b3bac1d0d773f2b6cc35f6bab61eb5de5ef Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:33 +0530 Subject: [PATCH 545/714] octeontx2-af: Prepare for allocating MCAM rules for AF AF till now only manages the allocation and freeing of MCAM rules for other PF/VFs in system. To implement L2 switching between all CGX mapped PF and VFs, AF requires MCAM entries for DMAC rules for each PF and VF. This patch modifies AF driver such that AF can also allocate MCAM rules and install rules for other PFs and VFs. All the checks like channel verification for RX rules and PF_FUNC verification for TX rules are relaxed in case AF is allocating or installing rules. Also all the entry and counter to owner mappings are set to NPC_MCAM_INVALID_MAP when they are free indicating those are not allocated to AF nor PF/VFs. This patch also ensures that AF allocated and installed entries are displayed in debugfs. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../marvell/octeontx2/af/rvu_debugfs.c | 5 +-- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 32 +++++++++++++------ .../marvell/octeontx2/af/rvu_npc_fs.c | 11 ++++--- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 370d4ca1e5edbd..9b2dfbf90e5104 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2113,9 +2113,6 @@ static void rvu_print_npc_mcam_info(struct seq_file *s, int entry_acnt, entry_ecnt; int cntr_acnt, cntr_ecnt; - /* Skip PF0 */ - if (!pcifunc) - return; rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr, &entry_acnt, &entry_ecnt); rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr, @@ -2298,7 +2295,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, struct rvu_npc_mcam_rule *rule) { - if (rule->intf == NIX_INTF_TX) { + if (is_npc_intf_tx(rule->intf)) { switch (rule->tx_action.op) { case NIX_TX_ACTIONOP_DROP: seq_puts(s, "\taction: Drop\n"); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 16c557cbe6a09b..1097291aaa453a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -442,7 +442,8 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, owner = mcam->entry2pfvf_map[index]; target_func = (entry->action >> 4) & 0xffff; /* do nothing when target is LBK/PF or owner is not PF */ - if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || + if (is_pffunc_af(owner) || is_afvf(target_func) || + (owner & RVU_PFVF_FUNC_MASK) || !(target_func & RVU_PFVF_FUNC_MASK)) return; @@ -661,6 +662,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.op = action.op; req.hdr.pcifunc = 0; /* AF is requester */ @@ -810,6 +812,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.entry = index; req.hdr.pcifunc = 0; /* AF is requester */ @@ -1756,6 +1759,8 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) int nixlf_count = rvu_get_nixlf_count(rvu); struct npc_mcam *mcam = &rvu->hw->mcam; int rsvd, err; + u16 index; + int cntr; u64 cfg; /* Actual number of MCAM entries vary by entry size */ @@ -1856,6 +1861,14 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) if (!mcam->entry2target_pffunc) goto free_mem; + for (index = 0; index < mcam->bmap_entries; index++) { + mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; + } + + for (cntr = 0; cntr < mcam->counters.max; cntr++) + mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP; + mutex_init(&mcam->lock); return 0; @@ -2573,7 +2586,7 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, } /* Alloc request from PFFUNC with no NIXLF attached should be denied */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_ALLOC_DENIED; return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp); @@ -2593,7 +2606,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* Free request from PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; mutex_lock(&mcam->lock); @@ -2605,7 +2618,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, if (rc) goto exit; - mcam->entry2pfvf_map[req->entry] = 0; + mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP; mcam->entry2target_pffunc[req->entry] = 0x0; npc_mcam_clear_bit(mcam, req->entry); npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); @@ -2690,13 +2703,14 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, - pcifunc)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } @@ -2847,7 +2861,7 @@ int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* If the request is from a PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; /* Since list of allocated counter IDs needs to be sent to requester, @@ -3092,7 +3106,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, if (rc) { /* Free allocated MCAM entry */ mutex_lock(&mcam->lock); - mcam->entry2pfvf_map[entry] = 0; + mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP; npc_mcam_clear_bit(mcam, entry); mutex_unlock(&mcam->lock); return rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 92d64bdff0ea27..c1f35a0971ad01 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -913,11 +913,9 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_rx_action action; - u64 chan_mask; - chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, - NIX_INTF_RX); + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, + 0, NIX_INTF_RX); *(u64 *)&action = 0x00; action.pf_func = target; @@ -1171,7 +1169,9 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, if (err) return err; - if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) + /* Skip channel validation if AF is installing */ + if (!is_pffunc_af(req->hdr.pcifunc) && + npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, target); @@ -1187,6 +1187,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, eth_broadcast_addr((u8 *)&req->mask.dmac); } + /* Proceed if NIXLF is attached or not for TX rules */ err = nix_get_nixlf(rvu, target, &nixlf, NULL); if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) return -EINVAL; From 23109f8dd06d0bd04c9360cf7c501c97b0ab1545 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:34 +0530 Subject: [PATCH 546/714] octeontx2-af: Introduce internal packet switching As of now any communication between CGXs PFs and their VFs within the system is possible only by external switches sending packets back to the system. This patch adds internal switching support. Broadcast packet replication is not covered here. RVU admin function (AF) maintains MAC addresses of all interfaces in the system. When switching is enabled, MCAM entries are allocated to install rules such that packets with DMAC matching any of the internal interface MAC addresses is punted back into the system via the loopback channel. On the receive side the default unicast rules are modified to not check for ingress channel. So any packet with matching DMAC irrespective of which interface it is coming from will be forwarded to the respective PF/VF interface. The transmit side rules and default unicast rules are updated if user changes MAC address of an interface. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/Makefile | 2 +- .../net/ethernet/marvell/octeontx2/af/rvu.c | 4 +- .../net/ethernet/marvell/octeontx2/af/rvu.h | 19 ++ .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 3 + .../marvell/octeontx2/af/rvu_devlink.c | 48 +++- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 4 + .../marvell/octeontx2/af/rvu_npc_fs.c | 9 +- .../marvell/octeontx2/af/rvu_switch.c | 258 ++++++++++++++++++ 8 files changed, 336 insertions(+), 11 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 1a3455620b381f..cc8ac36cf687de 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o rvu_mbox-y := mbox.o rvu_trace.o rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ - rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 086eb6d283ee43..017163fb3cd5cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1314,7 +1314,7 @@ int rvu_mbox_handler_detach_resources(struct rvu *rvu, return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); } -static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); int blkaddr = BLKADDR_NIX0, vf; @@ -3007,6 +3007,8 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Initialize debugfs */ rvu_dbg_init(rvu); + mutex_init(&rvu->rswitch.switch_lock); + return 0; err_dl: rvu_unregister_dl(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index e53f530e5e3196..91503fb2762c9b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -417,6 +417,14 @@ struct npc_kpu_profile_adapter { #define RVU_SWITCH_LBK_CHAN 63 +struct rvu_switch { + struct mutex switch_lock; /* Serialize flow installation */ + u32 used_entries; + u16 *entry2pcifunc; + u16 mode; + u16 start_entry; +}; + struct rvu { void __iomem *afreg_base; void __iomem *pfreg_base; @@ -447,6 +455,7 @@ struct rvu { /* CGX */ #define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */ + u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */ u8 cgx_mapped_pfs; u8 cgx_cnt_max; /* CGX port count max */ u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ @@ -479,6 +488,9 @@ struct rvu { struct rvu_debugfs rvu_dbg; #endif struct rvu_devlink *rvu_dl; + + /* RVU switch implementation over NPC with DMAC rules */ + struct rvu_switch rswitch; }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) @@ -693,6 +705,7 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_req *aq_req, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -770,4 +783,10 @@ void rvu_dbg_exit(struct rvu *rvu); static inline void rvu_dbg_init(struct rvu *rvu) {} static inline void rvu_dbg_exit(struct rvu *rvu) {} #endif + +/* RVU Switch */ +void rvu_switch_enable(struct rvu *rvu); +void rvu_switch_disable(struct rvu *rvu); +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc); + #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6cc8fbb7190cd6..fe99ac4a4dd804 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -126,6 +126,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) unsigned long lmac_bmap; int size, free_pkind; int cgx, lmac, iter; + int numvfs, hwvfs; if (!cgx_cnt_max) return 0; @@ -166,6 +167,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); rvu->cgx_mapped_pfs++; + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs); + rvu->cgx_mapped_vfs += numvfs; pf++; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 10a98bcb7c54e9..2688186066d94c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1364,6 +1364,44 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_nix_health_reporters_destroy(rvu_dl); } +static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + *mode = rswitch->mode; + + return 0; +} + +static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + if (rswitch->mode == mode) + return 0; + rswitch->mode = mode; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + rvu_switch_enable(rvu); + else + rvu_switch_disable(rvu); + break; + default: + return -EINVAL; + } + + return 0; +} + static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { @@ -1372,6 +1410,8 @@ static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req static const struct devlink_ops rvu_devlink_ops = { .info_get = rvu_devlink_info_get, + .eswitch_mode_get = rvu_devlink_eswitch_mode_get, + .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; int rvu_register_dl(struct rvu *rvu) @@ -1380,14 +1420,9 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL); - if (!rvu_dl) - return -ENOMEM; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); - kfree(rvu_dl); return -ENOMEM; } @@ -1395,10 +1430,10 @@ int rvu_register_dl(struct rvu *rvu) if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); - kfree(rvu_dl); return err; } + rvu_dl = devlink_priv(dl); rvu_dl->dl = dl; rvu_dl->rvu = rvu; rvu->rvu_dl = rvu_dl; @@ -1417,5 +1452,4 @@ void rvu_unregister_dl(struct rvu *rvu) rvu_health_reporters_destroy(rvu); devlink_unregister(dl); devlink_free(dl); - kfree(rvu_dl); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a2d69eaac4f882..0933699a0d2d7d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3212,6 +3212,8 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf) ether_addr_copy(pfvf->default_mac, req->mac_addr); + rvu_switch_update_rules(rvu, pcifunc); + return 0; } @@ -3881,6 +3883,8 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, pfvf = rvu_get_pfvf(rvu, pcifunc); set_bit(NIXLF_INITIALIZED, &pfvf->flags); + rvu_switch_update_rules(rvu, pcifunc); + return rvu_cgx_start_stop_io(rvu, pcifunc, true); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index c1f35a0971ad01..5c01cf4a9c5bb0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -910,10 +910,15 @@ static void rvu_mcam_add_counter_to_rule(struct rvu *rvu, u16 pcifunc, static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct mcam_entry *entry, - struct npc_install_flow_req *req, u16 target) + struct npc_install_flow_req *req, + u16 target, bool pf_set_vfs_mac) { + struct rvu_switch *rswitch = &rvu->rswitch; struct nix_rx_action action; + if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) + req->chan_mask = 0x0; /* Do not care channel */ + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, 0, NIX_INTF_RX); @@ -1007,7 +1012,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, req->intf); if (is_npc_intf_rx(req->intf)) - npc_update_rx_entry(rvu, pfvf, entry, req, target); + npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); else npc_update_tx_entry(rvu, pfvf, entry, req, target); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c new file mode 100644 index 00000000000000..2e5379710aa5a8 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2021 Marvell. + */ + +#include +#include "rvu.h" + +static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, + u16 chan_mask) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.features = BIT_ULL(NPC_DMAC); + req.channel = pfvf->rx_chan_base; + req.chan_mask = chan_mask; + req.intf = pfvf->nix_rx_intf; + req.op = NIX_RX_ACTION_DEFAULT; + req.default_rule = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + u8 lbkid; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.entry = entry; + req.features = BIT_ULL(NPC_DMAC); + req.intf = pfvf->nix_tx_intf; + req.op = NIX_TX_ACTIONOP_UCAST_CHAN; + req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN; + req.set_cntr = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_rules(struct rvu *rvu) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u16 start = rswitch->start_entry; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + u16 pcifunc, entry = 0; + int err; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + /* rvu_get_nix_blkaddr sets up the corresponding NIX block + * address and NIX RX and TX interfaces for a pcifunc. + * Generally it is called during attach call of a pcifunc but it + * is called here since we are pre-installing rules before + * nixlfs are attached + */ + rvu_get_nix_blkaddr(rvu, pcifunc); + + /* MCAM RX rule for a PF/VF already exists as default unicast + * rules installed by AF. Hence change the channel in those + * rules to ignore channel so that packets with the required + * DMAC received from LBK(by other PF/VFs in system) or from + * external world (from wire) are accepted. + */ + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry); + if (err) { + dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + rvu_get_nix_blkaddr(rvu, pcifunc); + + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, + "RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, + start + entry); + if (err) { + dev_err(rvu->dev, + "TX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + } + } + + return 0; +} + +void rvu_switch_enable(struct rvu *rvu) +{ + struct npc_mcam_alloc_entry_req alloc_req = { 0 }; + struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 }; + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct msg_rsp rsp; + int ret; + + alloc_req.contig = true; + alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs; + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, + &alloc_rsp); + if (ret) { + dev_err(rvu->dev, + "Unable to allocate MCAM entries\n"); + goto exit; + } + + if (alloc_rsp.count != alloc_req.count) { + dev_err(rvu->dev, + "Unable to allocate %d MCAM entries, got %d\n", + alloc_req.count, alloc_rsp.count); + goto free_entries; + } + + rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16), + GFP_KERNEL); + if (!rswitch->entry2pcifunc) + goto free_entries; + + rswitch->used_entries = alloc_rsp.count; + rswitch->start_entry = alloc_rsp.entry; + + ret = rvu_switch_install_rules(rvu); + if (ret) + goto uninstall_rules; + + return; + +uninstall_rules: + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + kfree(rswitch->entry2pcifunc); +free_entries: + free_req.all = 1; + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); +exit: + return; +} + +void rvu_switch_disable(struct rvu *rvu) +{ + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + struct msg_rsp rsp; + u16 pcifunc; + int err; + + if (!rswitch->used_entries) + return; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%d failed(%d)\n", + pf, err); + + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + } + } + + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + free_req.all = 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); + rswitch->used_entries = 0; + kfree(rswitch->entry2pcifunc); +} + +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u32 max = rswitch->used_entries; + u16 entry; + + if (!rswitch->used_entries) + return; + + for (entry = 0; entry < max; entry++) { + if (rswitch->entry2pcifunc[entry] == pcifunc) + break; + } + + if (entry >= max) + return; + + rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry); + rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); +} From 615c77eb5e870d1ffa95f4001cba3612bd2f2332 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 15 Jul 2021 00:10:26 -0500 Subject: [PATCH 547/714] powerpc/pasemi: Fix fall-through warning for Clang Fix the following fallthrough warning: arch/powerpc/platforms/pasemi/idle.c:45:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60efbf18.d9n6eXv275OJcc7T%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/powerpc/platforms/pasemi/idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 9b88e3cded7d2d..534b0317fc152b 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -42,6 +42,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take From 8cae8cd89f05f6de223d63e6d15e31c8ba9cf53b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 13 Jul 2021 17:49:23 +0200 Subject: [PATCH 548/714] seq_file: disallow extremely large seq buffer allocations There is no reasonable need for a buffer larger than this, and it avoids int overflow pitfalls. Fixes: 058504edd026 ("fs/seq_file: fallback to vmalloc allocation") Suggested-by: Al Viro Reported-by: Qualys Security Advisory Signed-off-by: Eric Sandeen Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/seq_file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/seq_file.c b/fs/seq_file.c index b117b212ef2887..4a2cda04d3e293 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -32,6 +32,9 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { + if (unlikely(size > MAX_RW_COUNT)) + return NULL; + return kvmalloc(size, GFP_KERNEL_ACCOUNT); } From 8d4abca95ecc82fc8c41912fa0085281f19cc29f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 19 Apr 2021 18:43:32 -0500 Subject: [PATCH 549/714] media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf() Fix an 11-year old bug in ngene_command_config_free_buf() while addressing the following warnings caught with -Warray-bounds: arch/alpha/include/asm/string.h:22:16: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] The problem is that the original code is trying to copy 6 bytes of data into a one-byte size member _config_ of the wrong structue FW_CONFIGURE_BUFFERS, in a single call to memcpy(). This causes a legitimate compiler warning because memcpy() overruns the length of &com.cmd.ConfigureBuffers.config. It seems that the right structure is FW_CONFIGURE_FREE_BUFFERS, instead, because it contains 6 more members apart from the header _hdr_. Also, the name of the function ngene_command_config_free_buf() suggests that the actual intention is to ConfigureFreeBuffers, instead of ConfigureBuffers (which takes place in the function ngene_command_config_buf(), above). Fix this by enclosing those 6 members of struct FW_CONFIGURE_FREE_BUFFERS into new struct config, and use &com.cmd.ConfigureFreeBuffers.config as the destination address, instead of &com.cmd.ConfigureBuffers.config, when calling memcpy(). This also helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). Link: https://github.com/KSPP/linux/issues/109 Fixes: dae52d009fc9 ("V4L/DVB: ngene: Initial check-in") Cc: stable@vger.kernel.org Reported-by: kernel test robot Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/linux-hardening/20210420001631.GA45456@embeddedor/ --- drivers/media/pci/ngene/ngene-core.c | 2 +- drivers/media/pci/ngene/ngene.h | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c index 07f342db6701f1..7481f553f95958 100644 --- a/drivers/media/pci/ngene/ngene-core.c +++ b/drivers/media/pci/ngene/ngene-core.c @@ -385,7 +385,7 @@ static int ngene_command_config_free_buf(struct ngene *dev, u8 *config) com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER; com.cmd.hdr.Length = 6; - memcpy(&com.cmd.ConfigureBuffers.config, config, 6); + memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6); com.in_len = 6; com.out_len = 0; diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h index 84f04e0e0cb9a1..3d296f1998a1a3 100644 --- a/drivers/media/pci/ngene/ngene.h +++ b/drivers/media/pci/ngene/ngene.h @@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS { struct FW_CONFIGURE_FREE_BUFFERS { struct FW_HEADER hdr; - u8 UVI1_BufferLength; - u8 UVI2_BufferLength; - u8 TVO_BufferLength; - u8 AUD1_BufferLength; - u8 AUD2_BufferLength; - u8 TVA_BufferLength; + struct { + u8 UVI1_BufferLength; + u8 UVI2_BufferLength; + u8 TVO_BufferLength; + u8 AUD1_BufferLength; + u8 AUD2_BufferLength; + u8 TVA_BufferLength; + } __packed config; } __attribute__ ((__packed__)); struct FW_CONFIGURE_UART { From fae21608c31ca987d9dfc0422ac9b5bd21c213a6 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 5 Jul 2021 20:29:50 +0530 Subject: [PATCH 550/714] scsi: mpt3sas: Transition IOC to Ready state during shutdown The IOC firmware assumes that the host driver is still alive after shutdown and continues to post events to host memory (due to faulty expander phy links, etc). This leads to 0x2666 (a bus fault occurred during a host-IOC memory access). Perform an IOC soft reset as part of shutdown to disable event posting. Link: https://lore.kernel.org/r/20210705145951.32258-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 32 ++++++++++++++-------------- drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 7 +++++- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index c39955239d1ca0..19b1c0cf5f2a2b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2983,13 +2983,13 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_free_irq - free irq + * mpt3sas_base_free_irq - free irq * @ioc: per adapter object * * Freeing respective reply_queue from the list. */ -static void -_base_free_irq(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) { struct adapter_reply_queue *reply_q, *next; @@ -3191,12 +3191,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc, } /** - * _base_disable_msix - disables msix + * mpt3sas_base_disable_msix - disables msix * @ioc: per adapter object * */ -static void -_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) { if (!ioc->msix_enable) return; @@ -3304,8 +3304,8 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc) for (i = 0; i < ioc->reply_queue_count; i++) { r = _base_request_irq(ioc, i); if (r) { - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); goto try_ioapic; } } @@ -3342,8 +3342,8 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc) dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); kfree(ioc->replyPostRegisterIndex); ioc->replyPostRegisterIndex = NULL; @@ -7613,14 +7613,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_make_ioc_ready - put controller in READY state + * mpt3sas_base_make_ioc_ready - put controller in READY state * @ioc: per adapter object * @type: FORCE_BIG_HAMMER or SOFT_RESET * * Return: 0 for success, non-zero for failure. */ -static int -_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) { u32 ioc_state; int rc; @@ -7897,7 +7897,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) if (ioc->chip_phys && ioc->chip) { mpt3sas_base_mask_interrupts(ioc); ioc->shost_recovery = 1; - _base_make_ioc_ready(ioc, SOFT_RESET); + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); ioc->shost_recovery = 0; } @@ -8017,7 +8017,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->build_sg_mpi = &_base_build_sg; ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge; - r = _base_make_ioc_ready(ioc, SOFT_RESET); + r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); if (r) goto out_free_resources; @@ -8471,7 +8471,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, _base_pre_reset_handler(ioc); mpt3sas_wait_for_commands_to_complete(ioc); mpt3sas_base_mask_interrupts(ioc); - r = _base_make_ioc_ready(ioc, type); + r = mpt3sas_base_make_ioc_ready(ioc, type); if (r) goto out; _base_clear_outstanding_commands(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index d4834c8ee9c0d3..0c6c3df0038d52 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1730,6 +1730,10 @@ do { ioc_err(ioc, "In func: %s\n", __func__); \ status, mpi_request, sz); } while (0) int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count); +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type); +void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc); /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 866d118f793148..8e64a6f1454296 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11295,7 +11295,12 @@ scsih_shutdown(struct pci_dev *pdev) _scsih_ir_shutdown(ioc); _scsih_nvme_shutdown(ioc); - mpt3sas_base_detach(ioc); + mpt3sas_base_mask_interrupts(ioc); + ioc->shost_recovery = 1; + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); + ioc->shost_recovery = 0; + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); } From 114613f62f42e7cbc1242c4e82076a0153043761 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 19 Jul 2021 18:17:46 -0500 Subject: [PATCH 551/714] ALSA: hda: intel-dsp-cfg: add missing ElkhartLake PCI ID We missed the fact that ElkhartLake platforms have two different PCI IDs. We only added one so the SOF driver is never selected by the autodetection logic for the missing configuration. BugLink: https://github.com/thesofproject/linux/issues/2990 Fixes: cc8f81c7e625 ('ALSA: hda: fix intel DSP config') Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210719231746.557325-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index d8be146793eee2..c9d0ba353463bd 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -319,6 +319,10 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, .device = 0x4b55, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, + .device = 0x4b58, + }, #endif /* Alder Lake */ From e9db418d4b828dd049caaf5ed65dc86f93bb1a0c Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Mon, 19 Jul 2021 18:43:49 +0200 Subject: [PATCH 552/714] USB: serial: cp210x: fix comments for GE CS1000 Fix comments for GE CS1000 CP210x USB ID assignments. Fixes: 42213a0190b5 ("USB: serial: cp210x: add some more GE USB IDs") Signed-off-by: Ian Ray Signed-off-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 09b845d0da41e3..af286240807e42 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -202,8 +202,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ - { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */ - { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */ { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ From 47e1e233e9d822dfda068383fb9a616451bda703 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 20 Jul 2021 09:28:09 +0200 Subject: [PATCH 553/714] efi/mokvar: Reserve the table only if it is in boot services data One of the SUSE QA tests triggered: localhost kernel: efi: Failed to lookup EFI memory descriptor for 0x000000003dcf8000 which comes from x86's version of efi_arch_mem_reserve() trying to reserve a memory region. Usually, that function expects EFI_BOOT_SERVICES_DATA memory descriptors but the above case is for the MOKvar table which is allocated in the EFI shim as runtime services. That lead to a fix changing the allocation of that table to boot services. However, that fix broke booting SEV guests with that shim leading to this kernel fix 8d651ee9c71b ("x86/ioremap: Map EFI-reserved memory as encrypted for SEV") which extended the ioremap hint to map reserved EFI boot services as decrypted too. However, all that wasn't needed, IMO, because that error message in efi_arch_mem_reserve() was innocuous in this case - if the MOKvar table is not in boot services, then it doesn't need to be reserved in the first place because it is, well, in runtime services which *should* be reserved anyway. So do that reservation for the MOKvar table only if it is allocated in boot services data. I couldn't find any requirement about where that table should be allocated in, unlike the ESRT which allocation is mandated to be done in boot services data by the UEFI spec. Signed-off-by: Borislav Petkov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/mokvar-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index d8bc013406861c..38722d2009e206 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -180,7 +180,10 @@ void __init efi_mokvar_table_init(void) pr_err("EFI MOKvar config table is not valid\n"); return; } - efi_mem_reserve(efi.mokvar_table, map_size_needed); + + if (md.type == EFI_BOOT_SERVICES_DATA) + efi_mem_reserve(efi.mokvar_table, map_size_needed); + efi_mokvar_table_size = map_size_needed; } From c4824ae7db418aee6f50f308a20b832e58e997fd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Jul 2021 11:26:40 +0200 Subject: [PATCH 554/714] ALSA: pcm: Fix mmap capability check The hw_support_mmap() doesn't cover all memory allocation types and might use a wrong device pointer for checking the capability. Check the all memory allocation types more completely. Cc: Link: https://lore.kernel.org/r/20210720092640.12338-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index c88c4316c417d9..6919d2943b9d3a 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,12 +246,18 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap || - (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV && - substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC)) + if (substream->ops->mmap) return true; - return dma_can_mmap(substream->dma_buffer.dev.dev); + switch (substream->dma_buffer.dev.type) { + case SNDRV_DMA_TYPE_UNKNOWN: + return false; + case SNDRV_DMA_TYPE_CONTINUOUS: + case SNDRV_DMA_TYPE_VMALLOC: + return true; + default: + return dma_can_mmap(substream->dma_buffer.dev.dev); + } } static int constrain_mask_params(struct snd_pcm_substream *substream, From d371588910715ebf7fa8e3a5d21ea5169c852927 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Jul 2021 11:27:32 +0200 Subject: [PATCH 555/714] ALSA: pcm: Fix mmap without buffer preallocation The recent rewrite of the memory allocation helpers also changed the page extraction to a common helper, snd_sgbuf_get_page(). But this assumes implicitly that the buffer was allocated via the standard helper (usually via preallocation), and didn't consider the case of the manual buffer handling. This patch fixes it and also covers the manual buffer management. Fixes: 37af81c5998f ("ALSA: core: Abstract memory alloc helpers") Link: https://lore.kernel.org/r/20210720092732.12412-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6919d2943b9d3a..6a2971a7e6a1c6 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3675,6 +3675,8 @@ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; if (substream->ops->page) page = substream->ops->page(substream, offset); + else if (!snd_pcm_get_dma_buf(substream)) + page = virt_to_page(runtime->dma_area + offset); else page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset); if (!page) From 2d85a1b31dde84038ea07ad825c3d8d3e71f4344 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 19 Jul 2021 10:55:14 +0300 Subject: [PATCH 556/714] ipv6: ip6_finish_output2: set sk into newly allocated nskb skb_set_owner_w() should set sk not to old skb but to new nskb. Fixes: 5796015fa968 ("ipv6: allocate enough headroom in ip6_finish_output2()") Signed-off-by: Vasily Averin Link: https://lore.kernel.org/r/70c0744f-89ae-1869-7e3e-4fa292158f4b@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01bea76e3891c2..e1b9f7ac8bade5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -74,7 +74,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (likely(nskb)) { if (skb->sk) - skb_set_owner_w(skb, skb->sk); + skb_set_owner_w(nskb, skb->sk); consume_skb(skb); } else { kfree_skb(skb); From 749468760b952e555529ca8a71256b991455101e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 02:20:28 -0700 Subject: [PATCH 557/714] net/tcp_fastopen: remove obsolete extern After cited commit, sysctl_tcp_fastopen_blackhole_timeout is no longer a global variable. Fixes: 3733be14a32b ("ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob") Signed-off-by: Eric Dumazet Cc: Haishuang Yan Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Link: https://lore.kernel.org/r/20210719092028.3016745-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 17df9b047ee46d..784d5c3ef1c5be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1709,7 +1709,6 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; -extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); From 6c2d125823ae89d10293437c6fb8a2f3406d721a Mon Sep 17 00:00:00 2001 From: Landen Chao Date: Tue, 20 Jul 2021 11:50:07 +0800 Subject: [PATCH 558/714] net: Update MAINTAINERS for MediaTek switch driver Update maintainers for MediaTek switch driver with Deng Qingfang who has contributed many high-quality patches (interrupt, VLAN, GPIO, and etc.) and will help maintenance. Signed-off-by: Landen Chao Signed-off-by: DENG Qingfang Reviewed-by: Florian Fainelli Acked-by: Vladimir Oltean Link: https://lore.kernel.org/r/49e1aa8aac58dcbf1b5e036d09b3fa3bbb1d94d0.1626751861.git.landen.chao@mediatek.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5779f6cacff7c0..e5ec539b15ed75 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11757,6 +11757,7 @@ F: drivers/char/hw_random/mtk-rng.c MEDIATEK SWITCH DRIVER M: Sean Wang M: Landen Chao +M: DENG Qingfang L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530.* From cbb56b03ec3f317e3728d0f68d25d4b9e590cdc9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 12:39:16 +0300 Subject: [PATCH 559/714] net: bridge: do not replay fdb entries pointing towards the bridge twice This simple script: ip link add br0 type bridge ip link set swp2 master br0 ip link set br0 address 00:01:02:03:04:05 ip link del br0 produces this result on a DSA switch: [ 421.306399] br0: port 1(swp2) entered blocking state [ 421.311445] br0: port 1(swp2) entered disabled state [ 421.472553] device swp2 entered promiscuous mode [ 421.488986] device swp2 left promiscuous mode [ 421.493508] br0: port 1(swp2) entered disabled state [ 421.886107] sja1105 spi0.1: port 1 failed to delete 00:01:02:03:04:05 vid 1 from fdb: -ENOENT [ 421.894374] sja1105 spi0.1: port 1 failed to delete 00:01:02:03:04:05 vid 0 from fdb: -ENOENT [ 421.943982] br0: port 1(swp2) entered blocking state [ 421.949030] br0: port 1(swp2) entered disabled state [ 422.112504] device swp2 entered promiscuous mode A very simplified view of what happens is: (1) the bridge port is created, and the bridge device inherits its MAC address (2) when joining, the bridge port (DSA) requests a replay of the addition of all FDB entries towards this bridge port and towards the bridge device itself. In fact, DSA calls br_fdb_replay() twice: br_fdb_replay(br, brport_dev); br_fdb_replay(br, br); DSA uses reference counting for the FDB entries. So the MAC address of the bridge is simply kept with refcount 2. When the bridge port leaves under normal circumstances, everything cancels out since the replay of the FDB entry deletion is also done twice per VLAN. (3) when the bridge MAC address changes, switchdev is notified of the deletion of the old address and of the insertion of the new one. But the old address does not really go away, since it had refcount 2, and the new address is added "only" with refcount 1. (4) when the bridge port leaves now, it will replay a deletion of the FDB entries pointing towards the bridge twice. Then DSA will complain that it can't delete something that no longer exists. It is clear that the problem is that the FDB entries towards the bridge are replayed too many times, so let's fix that problem. Fixes: 63c51453c82c ("net: dsa: replay the local bridge FDB entries pointing to the bridge dev too") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210719093916.4099032-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2b862cffc03a08..a16191dcaed199 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -780,7 +780,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, struct net_device *dst_dev; dst_dev = dst ? dst->dev : br->dev; - if (dst_dev != br_dev && dst_dev != dev) + if (dst_dev && dst_dev != dev) continue; err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); From 1b713d14dc3c077ec45e65dab4ea01a8bc41b8c1 Mon Sep 17 00:00:00 2001 From: Chengwen Feng Date: Mon, 19 Jul 2021 17:13:05 +0800 Subject: [PATCH 560/714] net: hns3: fix possible mismatches resp of mailbox Currently, the mailbox synchronous communication between VF and PF use the following fields to maintain communication: 1. Origin_mbx_msg which was combined by message code and subcode, used to match request and response. 2. Received_resp which means whether received response. There may possible mismatches of the following situation: 1. VF sends message A with code=1 subcode=1. 2. PF was blocked about 500ms when processing the message A. 3. VF will detect message A timeout because it can't get the response within 500ms. 4. VF sends message B with code=1 subcode=1 which equal message A. 5. PF processes the first message A and send the response message to VF. 6. VF will identify the response matched the message B because the code/subcode is the same. This will lead to mismatch of request and response. To fix the above bug, we use the following scheme: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message. As for PF driver, it only needs to copy the match_id from request to response. Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver") Signed-off-by: Chengwen Feng Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 0a6cda309b24a4..56b573e470725b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -143,7 +143,8 @@ struct hclge_mbx_vf_to_pf_cmd { u8 mbx_need_resp; u8 rsv1[1]; u8 msg_len; - u8 rsv2[3]; + u8 rsv2; + u16 match_id; struct hclge_vf_to_pf_msg msg; }; @@ -153,7 +154,8 @@ struct hclge_mbx_pf_to_vf_cmd { u8 dest_vfid; u8 rsv[3]; u8 msg_len; - u8 rsv1[3]; + u8 rsv1; + u16 match_id; struct hclge_pf_to_vf_msg msg; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index e10a2c36b70662..c0a478ae958347 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -47,6 +47,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; + resp_pf_to_vf->match_id = vf_to_pf_req->match_id; resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code; From 4671042f1ef0d37137884811afcc4ae67685ce07 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 19 Jul 2021 17:13:06 +0800 Subject: [PATCH 561/714] net: hns3: add match_id to check mailbox response from PF to VF When VF need response from PF, VF will wait (1us - 1s) to receive the response, or it will wait timeout and the VF action fails. If VF do not receive response in 1st action because timeout, the 2nd action may receive response for the 1st action, and get incorrect response data.VF must reciveve the right response from PF,or it will cause unexpected error. This patch adds match_id to check mailbox response from PF to VF, to make sure VF get the right response: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message. This scheme depends on PF driver supports match_id, if PF driver doesn't support then VF will uses the original scheme. Signed-off-by: Peng Li Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hclge_mbx.h | 1 + .../hisilicon/hns3/hns3vf/hclgevf_mbx.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 56b573e470725b..aa86a81c8f4af1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -98,6 +98,7 @@ struct hclgevf_mbx_resp_status { u32 origin_mbx_msg; bool received_resp; int resp_status; + u16 match_id; u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 9b17735b9f4ce3..772b2f8acd2e83 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -13,6 +13,7 @@ static int hclgevf_resp_to_errno(u16 resp_code) return resp_code ? -resp_code : 0; } +#define HCLGEVF_MBX_MATCH_ID_START 1 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) { /* this function should be called with mbx_resp.mbx_mutex held @@ -21,6 +22,10 @@ static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) hdev->mbx_resp.received_resp = false; hdev->mbx_resp.origin_mbx_msg = 0; hdev->mbx_resp.resp_status = 0; + hdev->mbx_resp.match_id++; + /* Update match_id and ensure the value of match_id is not zero */ + if (hdev->mbx_resp.match_id == 0) + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); } @@ -115,6 +120,7 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, if (need_resp) { mutex_lock(&hdev->mbx_resp.mbx_mutex); hclgevf_reset_mbx_resp_status(hdev); + req->match_id = hdev->mbx_resp.match_id; status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) { dev_err(&hdev->pdev->dev, @@ -211,6 +217,19 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) resp->additional_info[i] = *temp; temp++; } + + /* If match_id is not zero, it means PF support + * match_id. If the match_id is right, VF get the + * right response, otherwise ignore the response. + * Driver will clear hdev->mbx_resp when send + * next message which need response. + */ + if (req->match_id) { + if (req->match_id == resp->match_id) + resp->received_resp = true; + } else { + resp->received_resp = true; + } break; case HCLGE_MBX_LINK_STAT_CHANGE: case HCLGE_MBX_ASSERTING_RESET: From 184cd221a86321e53df9389c4b35a247b60c1e77 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 19 Jul 2021 17:13:07 +0800 Subject: [PATCH 562/714] net: hns3: disable port VLAN filter when support function level VLAN filter control For hardware limitation, port VLAN filter is port level, and effective for all the functions of the port. So if not support port VLAN bypass, it's necessary to disable the port VLAN filter, in order to support function level VLAN filter control. Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dd3354a57c6206..ebeaf12e409bcf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9552,13 +9552,17 @@ static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable) if (ret) return ret; - if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, !enable); - else if (!vport->vport_id) + } else if (!vport->vport_id) { + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) + enable = false; + ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, HCLGE_FILTER_FE_INGRESS, enable, 0); + } return ret; } From bbfd4506f962e7e6fff8f37f017154a3c3791264 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 19 Jul 2021 17:13:08 +0800 Subject: [PATCH 563/714] net: hns3: fix rx VLAN offload state inconsistent issue Currently, VF doesn't enable rx VLAN offload when initializating, and PF does it for VFs. If user disable the rx VLAN offload for VF with ethtool -K, and reload the VF driver, it may cause the rx VLAN offload state being inconsistent between hardware and software. Fixes it by enabling rx VLAN offload when VF initializing. Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 52eaf82b7cd71d..8784d61e833f15 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2641,6 +2641,16 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) { + struct hnae3_handle *nic = &hdev->nic; + int ret; + + ret = hclgevf_en_hw_strip_rxvtag(nic, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to enable rx vlan offload, ret = %d\n", ret); + return ret; + } + return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, false); } From c45c1e82bba130db4f19d9dbc1deefcf4ea994ed Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Sat, 17 Jul 2021 00:02:45 +0300 Subject: [PATCH 564/714] spi: spi-bcm2835: Fix deadlock The bcm2835_spi_transfer_one function can create a deadlock if it is called while another thread already has the CCF lock. Signed-off-by: Alexandru Tachici Fixes: f8043872e796 ("spi: add driver for BCM2835") Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210716210245.13240-2-alexandru.tachici@analog.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 5f8771fe1a31d0..775c0bf2f923d4 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -83,6 +83,7 @@ MODULE_PARM_DESC(polling_limit_us, * struct bcm2835_spi - BCM2835 SPI controller * @regs: base address of register map * @clk: core clock, divided to calculate serial clock + * @clk_hz: core clock cached speed * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full * @tfr: SPI transfer currently processed * @ctlr: SPI controller reverse lookup @@ -116,6 +117,7 @@ MODULE_PARM_DESC(polling_limit_us, struct bcm2835_spi { void __iomem *regs; struct clk *clk; + unsigned long clk_hz; int irq; struct spi_transfer *tfr; struct spi_controller *ctlr; @@ -1045,19 +1047,18 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, { struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); struct bcm2835_spidev *slv = spi_get_ctldata(spi); - unsigned long spi_hz, clk_hz, cdiv; + unsigned long spi_hz, cdiv; unsigned long hz_per_byte, byte_limit; u32 cs = slv->prepare_cs; /* set clock */ spi_hz = tfr->speed_hz; - clk_hz = clk_get_rate(bs->clk); - if (spi_hz >= clk_hz / 2) { + if (spi_hz >= bs->clk_hz / 2) { cdiv = 2; /* clk_hz/2 is the fastest we can go */ } else if (spi_hz) { /* CDIV must be a multiple of two */ - cdiv = DIV_ROUND_UP(clk_hz, spi_hz); + cdiv = DIV_ROUND_UP(bs->clk_hz, spi_hz); cdiv += (cdiv % 2); if (cdiv >= 65536) @@ -1065,7 +1066,7 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, } else { cdiv = 0; /* 0 is the slowest we can go */ } - tfr->effective_speed_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536); + tfr->effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536); bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); /* handle all the 3-wire mode */ @@ -1354,6 +1355,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev) return bs->irq ? bs->irq : -ENODEV; clk_prepare_enable(bs->clk); + bs->clk_hz = clk_get_rate(bs->clk); err = bcm2835_dma_init(ctlr, &pdev->dev, bs); if (err) From 68b11e8b1562986c134764433af64e97d30c9fc0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jul 2021 10:50:43 +0100 Subject: [PATCH 565/714] io_uring: explicitly count entries for poll reqs If __io_queue_proc() fails to add a second poll entry, e.g. kmalloc() failed, but it goes on with a third waitqueue, it may succeed and overwrite the error status. Count the number of poll entries we added, so we can set pt->error to zero at the beginning and find out when the mentioned scenario happens. Cc: stable@vger.kernel.org Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9d6b9e561f88bcc0163623b74a76c39f712151c3.1626774457.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8de..6668902cf50c0c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4802,6 +4802,7 @@ IO_NETOP_FN(recv); struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; + int nr_entries; int error; }; @@ -4995,11 +4996,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, struct io_kiocb *req = pt->req; /* - * If poll->head is already set, it's because the file being polled - * uses multiple waitqueues for poll handling (eg one for read, one - * for write). Setup a separate io_poll_iocb if this happens. + * The file being polled uses multiple waitqueues for poll handling + * (e.g. one for read, one for write). Setup a separate io_poll_iocb + * if this happens. */ - if (unlikely(poll->head)) { + if (unlikely(pt->nr_entries)) { struct io_poll_iocb *poll_one = poll; /* already have a 2nd entry, fail a third attempt */ @@ -5027,7 +5028,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, *poll_ptr = poll; } - pt->error = 0; + pt->nr_entries++; poll->head = head; if (poll->events & EPOLLEXCLUSIVE) @@ -5104,9 +5105,12 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->pt._key = mask; ipt->req = req; - ipt->error = -EINVAL; + ipt->error = 0; + ipt->nr_entries = 0; mask = vfs_poll(req->file, &ipt->pt) & poll->events; + if (unlikely(!ipt->nr_entries) && !ipt->error) + ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); if (likely(poll->head)) { From 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jul 2021 10:50:44 +0100 Subject: [PATCH 566/714] io_uring: remove double poll entry on arm failure __io_queue_proc() can enqueue both poll entries and still fail afterwards, so the callers trying to cancel it should also try to remove the second poll entry (if any). For example, it may leave the request alive referencing a io_uring context but not accessible for cancellation: [ 282.599913][ T1620] task:iou-sqp-23145 state:D stack:28720 pid:23155 ppid: 8844 flags:0x00004004 [ 282.609927][ T1620] Call Trace: [ 282.613711][ T1620] __schedule+0x93a/0x26f0 [ 282.634647][ T1620] schedule+0xd3/0x270 [ 282.638874][ T1620] io_uring_cancel_generic+0x54d/0x890 [ 282.660346][ T1620] io_sq_thread+0xaac/0x1250 [ 282.696394][ T1620] ret_from_fork+0x1f/0x30 Cc: stable@vger.kernel.org Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") Reported-and-tested-by: syzbot+ac957324022b7132accf@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0ec1228fc5eda4cb524eeda857da8efdc43c331c.1626774457.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6668902cf50c0c..6486b54a0f6201 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5113,6 +5113,8 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); + if (ipt->error) + io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); if (unlikely(list_empty(&poll->wait.entry))) { From 362a9e65289284f36403058eea2462d0330c1f24 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 20 Jul 2021 16:38:05 +0800 Subject: [PATCH 567/714] io_uring: fix memleak in io_init_wq_offload() I got memory leak report when doing fuzz test: BUG: memory leak unreferenced object 0xffff888107310a80 (size 96): comm "syz-executor.6", pid 4610, jiffies 4295140240 (age 20.135s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... backtrace: [<000000001974933b>] kmalloc include/linux/slab.h:591 [inline] [<000000001974933b>] kzalloc include/linux/slab.h:721 [inline] [<000000001974933b>] io_init_wq_offload fs/io_uring.c:7920 [inline] [<000000001974933b>] io_uring_alloc_task_context+0x466/0x640 fs/io_uring.c:7955 [<0000000039d0800d>] __io_uring_add_tctx_node+0x256/0x360 fs/io_uring.c:9016 [<000000008482e78c>] io_uring_add_tctx_node fs/io_uring.c:9052 [inline] [<000000008482e78c>] __do_sys_io_uring_enter fs/io_uring.c:9354 [inline] [<000000008482e78c>] __se_sys_io_uring_enter fs/io_uring.c:9301 [inline] [<000000008482e78c>] __x64_sys_io_uring_enter+0xabc/0xc20 fs/io_uring.c:9301 [<00000000b875f18f>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<00000000b875f18f>] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 [<000000006b0a8484>] entry_SYSCALL_64_after_hwframe+0x44/0xae CPU0 CPU1 io_uring_enter io_uring_enter io_uring_add_tctx_node io_uring_add_tctx_node __io_uring_add_tctx_node __io_uring_add_tctx_node io_uring_alloc_task_context io_uring_alloc_task_context io_init_wq_offload io_init_wq_offload hash = kzalloc hash = kzalloc ctx->hash_map = hash ctx->hash_map = hash <- one of the hash is leaked When calling io_uring_enter() in parallel, the 'hash_map' will be leaked, add uring_lock to protect 'hash_map'. Fixes: e941894eae31 ("io-wq: make buffered file write hashed work map per-ctx") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20210720083805.3030730-1-yangyingliang@huawei.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6486b54a0f6201..fe3d948658ad33 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7905,15 +7905,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct io_wq_data data; unsigned int concurrency; + mutex_lock(&ctx->uring_lock); hash = ctx->hash_map; if (!hash) { hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) + if (!hash) { + mutex_unlock(&ctx->uring_lock); return ERR_PTR(-ENOMEM); + } refcount_set(&hash->refs, 1); init_waitqueue_head(&hash->wait); ctx->hash_map = hash; } + mutex_unlock(&ctx->uring_lock); data.hash = hash; data.task = task; From 3abab27c322e0f2acf981595aa8040c9164dc9fb Mon Sep 17 00:00:00 2001 From: Charles Baylis Date: Fri, 16 Jul 2021 17:43:12 +0100 Subject: [PATCH 568/714] drm: Return -ENOTTY for non-drm ioctls drm: Return -ENOTTY for non-drm ioctls Return -ENOTTY from drm_ioctl() when userspace passes in a cmd number which doesn't relate to the drm subsystem. Glibc uses the TCGETS ioctl to implement isatty(), and without this change isatty() returns it incorrectly returns true for drm devices. To test run this command: $ if [ -t 0 ]; then echo is a tty; fi < /dev/dri/card0 which shows "is a tty" without this patch. This may also modify memory which the userspace application is not expecting. Signed-off-by: Charles Baylis Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/YPG3IBlzaMhfPqCr@stando.fishzet.co.uk --- drivers/gpu/drm/drm_ioctl.c | 3 +++ include/drm/drm_ioctl.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 98ae006616565a..f454e042408603 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -834,6 +834,9 @@ long drm_ioctl(struct file *filp, if (drm_dev_is_unplugged(dev)) return -ENODEV; + if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE) + return -ENOTTY; + is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; if (is_driver_ioctl) { diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index 10100a4bbe2ad6..afb27cb6a7bd84 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd, unsigned long arg); #define DRM_IOCTL_NR(n) _IOC_NR(n) +#define DRM_IOCTL_TYPE(n) _IOC_TYPE(n) #define DRM_MAJOR 226 /** From 7e777021780e9c373fc0c04d40b8407ce8c3b5d5 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Mon, 19 Jul 2021 20:23:57 +0200 Subject: [PATCH 569/714] mt7530 mt7530_fdb_write only set ivl bit vid larger than 1 Fixes my earlier patch which broke vlan unaware bridges. The IVL bit now only gets set for vid's larger than 1. Fixes: 11d8d98cbeef ("mt7530 fix mt7530_fdb_write vid missing ivl bit") Signed-off-by: Eric Woudstra Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 9e4df35f92ccdb..69f21b71614c5b 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,7 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; - reg[1] |= ATA2_IVL; + if (vid > 1) + reg[1] |= ATA2_IVL; reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't From 727d6a8b7ef3d25080fad228b2c4a1d4da5999c6 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 19 Jul 2021 16:41:24 -0700 Subject: [PATCH 570/714] net/sched: act_skbmod: Skip non-Ethernet packets Currently tcf_skbmod_act() assumes that packets use Ethernet as their L2 protocol, which is not always the case. As an example, for CAN devices: $ ip link add dev vcan0 type vcan $ ip link set up vcan0 $ tc qdisc add dev vcan0 root handle 1: htb $ tc filter add dev vcan0 parent 1: protocol ip prio 10 \ matchall action skbmod swap mac Doing the above silently corrupts all the packets. Do not perform skbmod actions for non-Ethernet packets. Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 81a1c67335be62..8d17a543cc9fef 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -33,6 +34,13 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) + return action; + /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately @@ -41,10 +49,6 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; - action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) - goto drop; - p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) From 75d5641497a60bb5d36ff77fd3f526906cbc148c Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Tue, 20 Jul 2021 08:08:38 +0300 Subject: [PATCH 571/714] fsl/fman: Add fibre support Set SUPPORTED_FIBRE to mac_dev->if_support. It allows proper usage of PHYs with optical/fiber support. Signed-off-by: Maxim Kochetkov Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 46ecb42f2ef8e6..d9fc5c456bf3e2 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -524,6 +524,7 @@ static void setup_memac(struct mac_device *mac_dev) | SUPPORTED_Autoneg \ | SUPPORTED_Pause \ | SUPPORTED_Asym_Pause \ + | SUPPORTED_FIBRE \ | SUPPORTED_MII) static DEFINE_MUTEX(eth_lock); From 8fb4792f091e608a0a1d353dfdf07ef55a719db5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jul 2021 15:08:40 +0200 Subject: [PATCH 572/714] ipv6: fix another slab-out-of-bounds in fib6_nh_flush_exceptions While running the self-tests on a KASAN enabled kernel, I observed a slab-out-of-bounds splat very similar to the one reported in commit 821bbf79fe46 ("ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions"). We additionally need to take care of fib6_metrics initialization failure when the caller provides an nh. The fix is similar, explicitly free the route instead of calling fib6_info_release on a half-initialized object. Fixes: f88d8ea67fbdb ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7b756a7dc03636..b6ddf23d38330d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3769,7 +3769,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, err = PTR_ERR(rt->fib6_metrics); /* Do not leave garbage there. */ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - goto out; + goto out_free; } if (cfg->fc_flags & RTF_ADDRCONF) From 91bed5565bba03b2a9f7334b58ae4be9df7c3840 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 20 Jul 2021 21:26:55 +0800 Subject: [PATCH 573/714] Revert "qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()" This reverts commit 6206b7981a36476f4695d661ae139f7db36a802d. That patch added additional spin_{un}lock_bh(), which was harmless but pointless. The orginal code path has guaranteed the pair of spin_{un}lock_bh(). We'd better revert it before we find the exact root cause of the bug_on mentioned in that patch. Fixes: 6206b7981a36 ("qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()") Cc: David S. Miller Cc: Prabhakar Kushwaha Signed-off-by: Jia He Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 79d879a5d66363..4387292c37e2f9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -474,18 +474,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (!qed_mcp_has_pending_cmd(p_hwfn)) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!qed_mcp_has_pending_cmd(p_hwfn)) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -502,8 +498,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - /* Send the mailbox command */ qed_mcp_reread_offsets(p_hwfn, p_ptt); seq_num = ++p_hwfn->mcp_info->drv_mb_seq; @@ -530,18 +524,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (p_cmd_elem->b_is_completed) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (p_cmd_elem->b_is_completed) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); } while (++cnt < max_retries); @@ -564,7 +554,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); From e81d71e343c6c62cf323042caed4b7ca049deda5 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 20 Jul 2021 18:32:16 +0300 Subject: [PATCH 574/714] ALSA: hda/hdmi: Add quirk to force pin connectivity on NUC10 On some Intel NUC10 variants, codec reports AC_JACK_PORT_NONE as pin default config for all pins. This results in broken audio. Add a quirk to force connectivity. BugLink: https://github.com/clearlinux/distribution/issues/2396 Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210720153216.2200938-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 84c088912b3c00..e143e69d8184f3 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1941,6 +1941,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; From 8798d070d416d18a75770fc19787e96705073f43 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 3 Jul 2021 11:56:55 +0200 Subject: [PATCH 575/714] rbd: always kick acquire on "acquired" and "released" notifications Skipping the "lock has been released" notification if the lock owner is not what we expect based on owner_cid can lead to I/O hangs. One example is our own notifications: because owner_cid is cleared in rbd_unlock(), when we get our own notification it is processed as unexpected/duplicate and maybe_kick_acquire() isn't called. If a peer that requested the lock then doesn't go through with acquiring it, I/O requests that came in while the lock was being quiesced would be stalled until another I/O request is submitted and kicks acquire from rbd_img_exclusive_lock(). This makes the comment in rbd_release_lock() actually true: prior to this change the canceled work was being requeued in response to the "lock has been acquired" notification from rbd_handle_acquired_lock(). Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Ilya Dryomov Tested-by: Robin Geuze --- drivers/block/rbd.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 531d390902dd65..e77cea6a618906 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4203,15 +4203,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - /* - * we already know that the remote client is - * the owner - */ - up_write(&rbd_dev->lock_rwsem); - return; + dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", + __func__, rbd_dev, cid.gid, cid.handle); + } else { + rbd_set_owner_cid(rbd_dev, &cid); } - - rbd_set_owner_cid(rbd_dev, &cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4236,14 +4232,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", + dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle, rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); - up_write(&rbd_dev->lock_rwsem); - return; + } else { + rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); } - - rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); From ed9eb71085ecb7ded9a5118cec2ab70667cc7350 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 3 Jul 2021 11:31:26 +0200 Subject: [PATCH 576/714] rbd: don't hold lock_rwsem while running_list is being drained Currently rbd_quiesce_lock() holds lock_rwsem for read while blocking on releasing_wait completion. On the I/O completion side, each image request also needs to take lock_rwsem for read. Because rw_semaphore implementation doesn't allow new readers after a writer has indicated interest in the lock, this can result in a deadlock if something that needs to take lock_rwsem for write gets involved. For example: 1. watch error occurs 2. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and releases lock_rwsem 3. after reestablishing the watch, rbd_reregister_watch() takes lock_rwsem for write and calls rbd_reacquire_lock() 4. rbd_quiesce_lock() downgrades lock_rwsem to for read and blocks on releasing_wait until running_list becomes empty 5. another watch error occurs 6. rbd_watch_errcb() blocks trying to take lock_rwsem for write 7. no in-flight image request can complete and delete itself from running_list because lock_rwsem won't be granted anymore A similar scenario can occur with "lock has been acquired" and "lock has been released" notification handers which also take lock_rwsem for write to update owner_cid. We don't actually get anything useful from sitting on lock_rwsem in rbd_quiesce_lock() -- owner_cid updates certainly don't need to be synchronized with. In fact the whole owner_cid tracking logic could probably be removed from the kernel client because we don't support proxied maintenance operations. Cc: stable@vger.kernel.org # 5.3+ URL: https://tracker.ceph.com/issues/42757 Signed-off-by: Ilya Dryomov Tested-by: Robin Geuze --- drivers/block/rbd.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e77cea6a618906..784797fa9a537c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4100,8 +4100,6 @@ static void rbd_acquire_lock(struct work_struct *work) static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) { - bool need_wait; - dout("%s rbd_dev %p\n", __func__, rbd_dev); lockdep_assert_held_write(&rbd_dev->lock_rwsem); @@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) */ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_assert(!completion_done(&rbd_dev->releasing_wait)); - need_wait = !list_empty(&rbd_dev->running_list); - downgrade_write(&rbd_dev->lock_rwsem); - if (need_wait) - wait_for_completion(&rbd_dev->releasing_wait); - up_read(&rbd_dev->lock_rwsem); + if (list_empty(&rbd_dev->running_list)) + return true; + + up_write(&rbd_dev->lock_rwsem); + wait_for_completion(&rbd_dev->releasing_wait); down_write(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) From cdb330f4b41ab55feb35487729e883c9e08b8a54 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 15 Jul 2021 14:40:39 +0100 Subject: [PATCH 577/714] ceph: don't WARN if we're still opening a session to an MDS If MDSs aren't available while mounting a filesystem, the session state will transition from SESSION_OPENING to SESSION_CLOSING. And in that scenario check_session_state() will be called from delayed_work() and trigger this WARN. Avoid this by only WARNing after a session has already been established (i.e., the s_ttl will be different from 0). Fixes: 62575e270f66 ("ceph: check session state after bumping session->s_seq") Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a818213c972fc0..9db1b39df7737f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s) break; case CEPH_MDS_SESSION_CLOSING: /* Should never reach this when we're unmounting */ - WARN_ON_ONCE(true); + WARN_ON_ONCE(s->s_ttl); fallthrough; case CEPH_MDS_SESSION_NEW: case CEPH_MDS_SESSION_RESTARTING: From 463f36c76fa4ec015c640ff63ccf52e7527abee0 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 16 Jul 2021 22:00:22 +0200 Subject: [PATCH 578/714] s390/boot: fix use of expolines in the DMA code The DMA code section of the decompressor must be compiled with expolines if Spectre V2 mitigation has been enabled for the decompressed kernel. This is required because although the decompressor's image contains the DMA code section, it is handed over to the decompressed kernel for use. Because the DMA code is already slow w/o expolines, use expolines always regardless whether the decompressed kernel is using them or not. This simplifies the DMA code by dropping the conditional compilation of expolines. Fixes: bf72630130c2 ("s390: use proper expoline sections for .dma code") Cc: # 5.2 Signed-off-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/boot/text_dma.S | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S index f7c77cd518f2ba..5ff5fee0280161 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/boot/text_dma.S @@ -9,16 +9,6 @@ #include #include -#ifdef CC_USING_EXPOLINE - .pushsection .dma.text.__s390_indirect_jump_r14,"axG" -__dma__s390_indirect_jump_r14: - larl %r1,0f - ex 0,0(%r1) - j . -0: br %r14 - .popsection -#endif - .section .dma.text,"ax" /* * Simplified version of expoline thunk. The normal thunks can not be used here, @@ -27,11 +17,10 @@ __dma__s390_indirect_jump_r14: * affects a few functions that are not performance-relevant. */ .macro BR_EX_DMA_r14 -#ifdef CC_USING_EXPOLINE - jg __dma__s390_indirect_jump_r14 -#else - br %r14 -#endif + larl %r1,0f + ex 0,0(%r1) + j . +0: br %r14 .endm /* From 7d244643758e4cb51a29f948f6be3edd15d92cc3 Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Mon, 19 Jul 2021 12:41:41 +0800 Subject: [PATCH 579/714] s390/cpumf: fix semicolon.cocci warnings arch/s390/kernel/perf_cpum_cf.c:748:2-3: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci Fixes: a029a4eab39e ("s390/cpumf: Allow concurrent access for CPU Measurement Counter Facility") CC: Thomas Richter Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 975a00c8c564b3..d7dc36ec0a60e9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -745,7 +745,7 @@ static int __init cpumf_pmu_init(void) if (!cf_dbg) { pr_err("Registration of s390dbf(cpum_cf) failed\n"); return -ENOMEM; - }; + } debug_register_view(cf_dbg, &debug_sprintf_view); cpumf_pmu.attr_groups = cpumf_cf_event_group(); From 0cde560a8bfc3cb790715f39d4535129cca9e6ae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 19 Jul 2021 21:58:21 +0200 Subject: [PATCH 580/714] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 24 ++++++++++++++++-------- arch/s390/configs/defconfig | 24 +++++++++++++++++------- arch/s390/configs/zfcpdump_defconfig | 3 +-- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 55cb846cda37d4..7de253f766e81d 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,7 +5,12 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_LSM=y CONFIG_PREEMPT=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -28,14 +33,13 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_MISC=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_LSM=y -CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -76,6 +80,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y @@ -95,6 +100,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y @@ -158,6 +164,7 @@ CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_MPTCP=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -280,6 +287,7 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -384,12 +392,11 @@ CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y -CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y +CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y @@ -496,6 +503,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -552,7 +560,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y CONFIG_PPS=m @@ -575,7 +582,6 @@ CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y @@ -620,6 +626,7 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_STATS=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -655,7 +662,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -683,6 +689,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y @@ -697,6 +704,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m @@ -844,7 +852,6 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m -CONFIG_TEST_LIST_SORT=y CONFIG_TEST_MIN_HEAP=y CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y @@ -854,3 +861,4 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 5d847ab5feaa68..b671642967ba61 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,11 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_LSM=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -26,14 +31,13 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_MISC=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_LSM=y -CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -70,6 +74,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y @@ -87,6 +92,7 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y @@ -149,6 +155,7 @@ CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_MPTCP=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -271,6 +278,7 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -374,11 +382,10 @@ CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y -CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set +CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y @@ -488,6 +495,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -544,7 +552,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y # CONFIG_PTP_1588_CLOCK is not set @@ -567,7 +574,6 @@ CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y @@ -608,6 +614,7 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_STATS=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -643,7 +650,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -670,6 +676,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y @@ -685,6 +692,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m @@ -755,6 +763,7 @@ CONFIG_CRC8=m CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y @@ -782,3 +791,4 @@ CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m +CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 76123a4b26ab06..d576aaab27c919 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -29,9 +29,9 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set -# CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set +# CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y @@ -51,7 +51,6 @@ CONFIG_ZFCP=y # CONFIG_SERIO is not set # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set -CONFIG_RAW_DRIVER=y # CONFIG_HMC_DRV is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set From 4a8ac5e45cdaa88884b4ce05303e304cbabeb367 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 16 Jul 2021 08:58:32 +1200 Subject: [PATCH 581/714] i2c: mpc: Poll for MCF During some transfers the bus can still be busy when an interrupt is received. Commit 763778cd7926 ("i2c: mpc: Restore reread of I2C status register") attempted to address this by re-reading MPC_I2C_SR once but that just made it less likely to happen without actually preventing it. Instead of a single re-read, poll with a timeout so that the bus is given enough time to settle but a genuine stuck SCL is still noticed. Fixes: 1538d82f4647 ("i2c: mpc: Interrupt driven transfer") Signed-off-by: Chris Packham Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 6d5014ebaab5e8..a6ea1eb1394e1c 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -635,8 +635,8 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) status = readb(i2c->base + MPC_I2C_SR); if (status & CSR_MIF) { - /* Read again to allow register to stabilise */ - status = readb(i2c->base + MPC_I2C_SR); + /* Wait up to 100us for transfer to properly complete */ + readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100); writeb(0, i2c->base + MPC_I2C_SR); mpc_i2c_do_intr(i2c, status); return IRQ_HANDLED; From 09cfae9f13d51700b0fecf591dcd658fc5375428 Mon Sep 17 00:00:00 2001 From: Markus Boehme Date: Tue, 20 Jul 2021 16:26:19 -0700 Subject: [PATCH 582/714] ixgbe: Fix packet corruption due to missing DMA sync When receiving a packet with multiple fragments, hardware may still touch the first fragment until the entire packet has been received. The driver therefore keeps the first fragment mapped for DMA until end of packet has been asserted, and delays its dma_sync call until then. The driver tries to fit multiple receive buffers on one page. When using 3K receive buffers (e.g. using Jumbo frames and legacy-rx is turned off/build_skb is being used) on an architecture with 4K pages, the driver allocates an order 1 compound page and uses one page per receive buffer. To determine the correct offset for a delayed DMA sync of the first fragment of a multi-fragment packet, the driver then cannot just use PAGE_MASK on the DMA address but has to construct a mask based on the actual size of the backing page. Using PAGE_MASK in the 3K RX buffer/4K page architecture configuration will always sync the first page of a compound page. With the SWIOTLB enabled this can lead to corrupted packets (zeroed out first fragment, re-used garbage from another packet) and various consequences, such as slow/stalling data transfers and connection resets. For example, testing on a link with MTU exceeding 3058 bytes on a host with SWIOTLB enabled (e.g. "iommu=soft swiotlb=262144,force") TCP transfers quickly fizzle out without this patch. Cc: stable@vger.kernel.org Fixes: 0c5661ecc5dd7 ("ixgbe: fix crash in build_skb Rx code path") Signed-off-by: Markus Boehme Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 913253f8ecb4ec..14aea40da50fb8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { if (ring_uses_build_skb(rx_ring)) { - unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; + unsigned long offset = (unsigned long)(skb->data) & mask; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, From a47fa41381a09e5997afd762664db4f5f6657e03 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 21 Jul 2021 00:55:22 +0200 Subject: [PATCH 583/714] scsi: target: Fix NULL dereference on XCOPY completion CPU affinity control added with commit 39ae3edda325 ("scsi: target: core: Make completion affinity configurable") makes target_complete_cmd() queue work on a CPU based on se_tpg->se_tpg_wwn->cmd_compl_affinity state. LIO's EXTENDED COPY worker is a special case in that read/write cmds are dispatched using the global xcopy_pt_tpg, which carries a NULL se_tpg_wwn pointer following initialization in target_xcopy_setup_pt(). The NULL xcopy_pt_tpg->se_tpg_wwn pointer is dereferenced on completion of any EXTENDED COPY initiated read/write cmds. E.g using the libiscsi SCSI.ExtendedCopy.Simple test: BUG: kernel NULL pointer dereference, address: 00000000000001a8 RIP: 0010:target_complete_cmd+0x9d/0x130 [target_core_mod] Call Trace: fd_execute_rw+0x148/0x42a [target_core_file] ? __dynamic_pr_debug+0xa7/0xe0 ? target_check_reservation+0x5b/0x940 [target_core_mod] __target_execute_cmd+0x1e/0x90 [target_core_mod] transport_generic_new_cmd+0x17c/0x330 [target_core_mod] target_xcopy_issue_pt_cmd+0x9/0x60 [target_core_mod] target_xcopy_read_source.isra.7+0x10b/0x1b0 [target_core_mod] ? target_check_fua+0x40/0x40 [target_core_mod] ? transport_complete_task_attr+0x130/0x130 [target_core_mod] target_xcopy_do_work+0x61f/0xc00 [target_core_mod] This fix makes target_complete_cmd() queue work on se_cmd->cpuid if se_tpg_wwn is NULL. Link: https://lore.kernel.org/r/20210720225522.26291-1-ddiss@suse.de Fixes: 39ae3edda325 ("scsi: target: core: Make completion affinity configurable") Cc: Lee Duncan Cc: Mike Christie Reviewed-by: Mike Christie Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7e35eddd9eb700..26ceabe34de554 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -886,7 +886,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) INIT_WORK(&cmd->work, success ? target_complete_ok_work : target_complete_failure_work); - if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) + if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) cpu = cmd->cpuid; else cpu = wwn->cmd_compl_affinity; From 235c3610d5f02ee91244239b43cd9ae8b4859dff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:13:55 -0500 Subject: [PATCH 584/714] drm/ttm: Force re-init if ttm_global_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have a failure, decrement the reference count so that the next call to ttm_global_init() will actually do something instead of assume everything is all set up. Signed-off-by: Jason Ekstrand Fixes: 62b53b37e4b1 ("drm/ttm: use a static ttm_bo_global instance") Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210720181357.2760720-5-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 5f31acec3ad76b..519deea8e39b76 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -100,6 +100,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret) + --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); return ret; } From 44cf53602f5a0db80d53c8fff6cdbcae59650a42 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Mon, 19 Jul 2021 00:05:19 -0700 Subject: [PATCH 585/714] Revert "usb: renesas-xhci: Fix handling of unknown ROM state" This reverts commit d143825baf15f204dac60acdf95e428182aa3374. Justin reports some of his systems now fail as result of this commit: xhci_hcd 0000:04:00.0: Direct firmware load for renesas_usb_fw.mem failed with error -2 xhci_hcd 0000:04:00.0: request_firmware failed: -2 xhci_hcd: probe of 0000:04:00.0 failed with error -2 The revert brings back the original issue the commit tried to solve but at least unbreaks existing systems relying on previous behavior. Cc: stable@vger.kernel.org Cc: Mathias Nyman Cc: Vinod Koul Cc: Justin Forbes Reported-by: Justin Forbes Signed-off-by: Moritz Fischer Fixes: d143825baf15 ("usb: renesas-xhci: Fix handling of unknown ROM state") Link: https://lore.kernel.org/r/20210719070519.41114-1-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci-renesas.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 1da647961c25ce..5923844ed8218d 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -207,8 +207,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev) return 0; case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */ - dev_dbg(&pdev->dev, "Unknown ROM status ...\n"); - break; + return 0; case RENESAS_ROM_STATUS_ERROR: /* Error State */ default: /* All other states are marked as "Reserved states" */ @@ -225,12 +224,13 @@ static int renesas_fw_check_running(struct pci_dev *pdev) u8 fw_state; int err; - /* - * Only if device has ROM and loaded FW we can skip loading and - * return success. Otherwise (even unknown state), attempt to load FW. - */ - if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev)) - return 0; + /* Check if device has ROM and loaded, if so skip everything */ + err = renesas_check_rom(pdev); + if (err) { /* we have rom */ + err = renesas_check_rom_state(pdev); + if (!err) + return err; + } /* * Test if the device is actually needing the firmware. As most From 72f68bf5c756f5ce1139b31daae2684501383ad5 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:06:51 +0300 Subject: [PATCH 586/714] xhci: Fix lost USB 2 remote wake There's a small window where a USB 2 remote wake may be left unhandled due to a race between hub thread and xhci port event interrupt handler. When the resume event is detected in the xhci interrupt handler it kicks the hub timer, which should move the port from resume to U0 once resume has been signalled for long enough. To keep the hub "thread" running we set a bus_state->resuming_ports flag. This flag makes sure hub timer function kicks itself. checking this flag was not properly protected by the spinlock. Flag was copied to a local variable before lock was taken. The local variable was then checked later with spinlock held. If interrupt is handled right after copying the flag to the local variable we end up stopping the hub thread before it can handle the USB 2 resume. CPU0 CPU1 (hub thread) (xhci event handler) xhci_hub_status_data() status = bus_state->resuming_ports; handle_port_status() spin_lock() bus_state->resuming_ports = 1 set_flag(HCD_FLAG_POLL_RH) spin_unlock() spin_lock() if (!status) clear_flag(HCD_FLAG_POLL_RH) spin_unlock() Fix this by taking the lock a bit earlier so that it covers the resuming_ports flag copy in the hub thread Cc: Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210715150651.1996099-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index e9b18fc1761723..151e93c4bd5743 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1638,11 +1638,12 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) * Inform the usbcore about resume-in-progress by returning * a non-zero value even if there are no status changes. */ + spin_lock_irqsave(&xhci->lock, flags); + status = bus_state->resuming_ports; mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; - spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ for (i = 0; i < max_ports; i++) { temp = readl(ports[i]->addr); From 57560ee95cb7f91cf0bc31d4ae8276e0dcfe17aa Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Wed, 14 Jul 2021 08:18:07 +0200 Subject: [PATCH 587/714] usb: typec: tipd: Don't block probing of consumer of "connector" nodes Similar as with tcpm this patch lets fw_devlink know not to wait on the fwnode to be populated as a struct device. Without this patch, USB functionality can be broken on some previously supported boards. Fixes: 28ec344bb891 ("usb: typec: tcpm: Don't block probing of consumers of "connector" nodes") Cc: stable Acked-by: Heikki Krogerus Signed-off-by: Martin Kepplinger Link: https://lore.kernel.org/r/20210714061807.5737-1-martin.kepplinger@puri.sm Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 938219bc1b4bea..21b3ae25c76d2c 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -629,6 +629,15 @@ static int tps6598x_probe(struct i2c_client *client) if (!fwnode) return -ENODEV; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + tps->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(tps->role_sw)) { ret = PTR_ERR(tps->role_sw); From 1bf2761c837571a66ec290fb66c90413821ffda2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:01:21 +0300 Subject: [PATCH 588/714] usb: hub: Fix link power management max exit latency (MEL) calculations Maximum Exit Latency (MEL) value is used by host to know how much in advance it needs to start waking up a U1/U2 suspended link in order to service a periodic transfer in time. Current MEL calculation only includes the time to wake up the path from U1/U2 to U0. This is called tMEL1 in USB 3.1 section C 1.5.2 Total MEL = tMEL1 + tMEL2 +tMEL3 + tMEL4 which should additinally include: - tMEL2 which is the time it takes for PING message to reach device - tMEL3 time for device to process the PING and submit a PING_RESPONSE - tMEL4 time for PING_RESPONSE to traverse back upstream to host. Add the missing tMEL2, tMEL3 and tMEL4 to MEL calculation. Cc: # v3.5 Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210715150122.1995966-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 52 +++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d1efc714133376..a35d0bedafa3c6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -48,6 +48,7 @@ #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ +#define USB_PING_RESPONSE_TIME 400 /* ns */ /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -182,8 +183,9 @@ int usb_device_supports_lpm(struct usb_device *udev) } /* - * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from - * either U1 or U2. + * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from + * U1/U2, send a PING to the device and receive a PING_RESPONSE. + * See USB 3.1 section C.1.5.2 */ static void usb_set_lpm_mel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params, @@ -193,35 +195,37 @@ static void usb_set_lpm_mel(struct usb_device *udev, unsigned int hub_exit_latency) { unsigned int total_mel; - unsigned int device_mel; - unsigned int hub_mel; /* - * Calculate the time it takes to transition all links from the roothub - * to the parent hub into U0. The parent hub must then decode the - * packet (hub header decode latency) to figure out which port it was - * bound for. - * - * The Hub Header decode latency is expressed in 0.1us intervals (0x1 - * means 0.1us). Multiply that by 100 to get nanoseconds. + * tMEL1. time to transition path from host to device into U0. + * MEL for parent already contains the delay up to parent, so only add + * the exit latency for the last link (pick the slower exit latency), + * and the hub header decode latency. See USB 3.1 section C 2.2.1 + * Store MEL in nanoseconds */ total_mel = hub_lpm_params->mel + - (hub->descriptor->u.ss.bHubHdrDecLat * 100); + max(udev_exit_latency, hub_exit_latency) * 1000 + + hub->descriptor->u.ss.bHubHdrDecLat * 100; /* - * How long will it take to transition the downstream hub's port into - * U0? The greater of either the hub exit latency or the device exit - * latency. - * - * The BOS U1/U2 exit latencies are expressed in 1us intervals. - * Multiply that by 1000 to get nanoseconds. + * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for + * each link + wHubDelay for each hub. Add only for last link. + * tMEL4, the time for PING_RESPONSE to traverse upstream is similar. + * Multiply by 2 to include it as well. */ - device_mel = udev_exit_latency * 1000; - hub_mel = hub_exit_latency * 1000; - if (device_mel > hub_mel) - total_mel += device_mel; - else - total_mel += hub_mel; + total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) + + USB_TP_TRANSMISSION_DELAY) * 2; + + /* + * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE + * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4 + * to cover the delay if the PING_RESPONSE is queued behind a Max Packet + * Size DP. + * Note these delays should be added only once for the entire path, so + * add them to the MEL of the device connected to the roothub. + */ + if (!hub->hdev->parent) + total_mel += USB_PING_RESPONSE_TIME + 2100; udev_lpm_params->mel = total_mel; } From 1b7f56fbc7a1b66967b6114d1b5f5a257c3abae6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:01:22 +0300 Subject: [PATCH 589/714] usb: hub: Disable USB 3 device initiated lpm if exit latency is too high The device initiated link power management U1/U2 states should not be enabled in case the system exit latency plus one bus interval (125us) is greater than the shortest service interval of any periodic endpoint. This is the case for both U1 and U2 sytstem exit latencies and link states. See USB 3.2 section 9.4.9 "Set Feature" for more details Note, before this patch the host and device initiated U1/U2 lpm states were both enabled with lpm. After this patch it's possible to end up with only host inititated U1/U2 lpm in case the exit latencies won't allow device initiated lpm. If this case we still want to set the udev->usb3_lpm_ux_enabled flag so that sysfs users can see the link may go to U1/U2. Signed-off-by: Mathias Nyman Cc: stable Link: https://lore.kernel.org/r/20210715150122.1995966-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 68 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a35d0bedafa3c6..86658a81d28445 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4116,6 +4116,47 @@ static int usb_set_lpm_timeout(struct usb_device *udev, return 0; } +/* + * Don't allow device intiated U1/U2 if the system exit latency + one bus + * interval is greater than the minimum service interval of any active + * periodic endpoint. See USB 3.2 section 9.4.9 + */ +static bool usb_device_may_initiate_lpm(struct usb_device *udev, + enum usb3_link_state state) +{ + unsigned int sel; /* us */ + int i, j; + + if (state == USB3_LPM_U1) + sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); + else if (state == USB3_LPM_U2) + sel = DIV_ROUND_UP(udev->u2_params.sel, 1000); + else + return false; + + for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { + struct usb_interface *intf; + struct usb_endpoint_descriptor *desc; + unsigned int interval; + + intf = udev->actconfig->interface[i]; + if (!intf) + continue; + + for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) { + desc = &intf->cur_altsetting->endpoint[j].desc; + + if (usb_endpoint_xfer_int(desc) || + usb_endpoint_xfer_isoc(desc)) { + interval = (1 << (desc->bInterval - 1)) * 125; + if (sel + 125 > interval) + return false; + } + } + } + return true; +} + /* * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated * U1/U2 entry. @@ -4188,20 +4229,23 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, * U1/U2_ENABLE */ if (udev->actconfig && - usb_set_device_initiated_lpm(udev, state, true) == 0) { - if (state == USB3_LPM_U1) - udev->usb3_lpm_u1_enabled = 1; - else if (state == USB3_LPM_U2) - udev->usb3_lpm_u2_enabled = 1; - } else { - /* Don't request U1/U2 entry if the device - * cannot transition to U1/U2. - */ - usb_set_lpm_timeout(udev, state, 0); - hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + usb_device_may_initiate_lpm(udev, state)) { + if (usb_set_device_initiated_lpm(udev, state, true)) { + /* + * Request to enable device initiated U1/U2 failed, + * better to turn off lpm in this case. + */ + usb_set_lpm_timeout(udev, state, 0); + hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + return; + } } -} + if (state == USB3_LPM_U1) + udev->usb3_lpm_u1_enabled = 1; + else if (state == USB3_LPM_U2) + udev->usb3_lpm_u2_enabled = 1; +} /* * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated * U1/U2 entry. From 0b60557230adfdeb8164e0b342ac9cd469a75759 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Thu, 15 Jul 2021 17:37:44 -0400 Subject: [PATCH 590/714] usb: ehci: Prevent missed ehci interrupts with edge-triggered MSI When MSI is used by the ehci-hcd driver, it can cause lost interrupts which results in EHCI only continuing to work due to a polling fallback. But the reliance of polling drastically reduces performance of any I/O through EHCI. Interrupts are lost as the EHCI interrupt handler does not safely handle edge-triggered interrupts. It fails to ensure all interrupt status bits are cleared, which works with level-triggered interrupts but not the edge-triggered interrupts typical from using MSI. To fix this problem, check if the driver may have raced with the hardware setting additional interrupt status bits and clear status until it is in a stable state. Fixes: 306c54d0edb6 ("usb: hcd: Try MSI interrupts on PCI devices") Tested-by: Laurence Oberman Reviewed-by: Andy Shevchenko Acked-by: Alan Stern Signed-off-by: David Jeffery Link: https://lore.kernel.org/r/20210715213744.GA44506@redhat Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 36f5bf6a075231..10b0365f34399a 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -703,24 +703,28 @@ EXPORT_SYMBOL_GPL(ehci_setup); static irqreturn_t ehci_irq (struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); - u32 status, masked_status, pcd_status = 0, cmd; + u32 status, current_status, masked_status, pcd_status = 0; + u32 cmd; int bh; spin_lock(&ehci->lock); - status = ehci_readl(ehci, &ehci->regs->status); + status = 0; + current_status = ehci_readl(ehci, &ehci->regs->status); +restart: /* e.g. cardbus physical eject */ - if (status == ~(u32) 0) { + if (current_status == ~(u32) 0) { ehci_dbg (ehci, "device removed\n"); goto dead; } + status |= current_status; /* * We don't use STS_FLR, but some controllers don't like it to * remain on, so mask it out along with the other status bits. */ - masked_status = status & (INTR_MASK | STS_FLR); + masked_status = current_status & (INTR_MASK | STS_FLR); /* Shared IRQ? */ if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) { @@ -730,6 +734,12 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) /* clear (just) interrupts */ ehci_writel(ehci, masked_status, &ehci->regs->status); + + /* For edge interrupts, don't race with an interrupt bit being raised */ + current_status = ehci_readl(ehci, &ehci->regs->status); + if (current_status & INTR_MASK) + goto restart; + cmd = ehci_readl(ehci, &ehci->regs->command); bh = 0; From 6abf2fe6b4bf6e5256b80c5817908151d2d33e9f Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 20 Jul 2021 19:19:10 +0200 Subject: [PATCH 591/714] USB: usb-storage: Add LaCie Rugged USB3-FW to IGNORE_UAS LaCie Rugged USB3-FW appears to be incompatible with UAS. It generates errors like: [ 1151.582598] sd 14:0:0:0: tag#16 uas_eh_abort_handler 0 uas-tag 1 inflight: IN [ 1151.582602] sd 14:0:0:0: tag#16 CDB: Report supported operation codes a3 0c 01 12 00 00 00 00 02 00 00 00 [ 1151.588594] scsi host14: uas_eh_device_reset_handler start [ 1151.710482] usb 2-4: reset SuperSpeed Gen 1 USB device number 2 using xhci_hcd [ 1151.741398] scsi host14: uas_eh_device_reset_handler success [ 1181.785534] scsi host14: uas_eh_device_reset_handler start Signed-off-by: Julian Sikorski Cc: stable Link: https://lore.kernel.org/r/20210720171910.36497-1-belegdol+github@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index f9677a5ec31b28..c35a6db993f1b6 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -45,6 +45,13 @@ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Julian Sikorski */ +UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, + "LaCie", + "Rugged USB3-FW", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI * commands in UAS mode. Observed with the 1.28 firmware; are there others? From 86762ad4abcc549deb7a155c8e5e961b9755bcf0 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 16 Jul 2021 14:07:17 +0200 Subject: [PATCH 592/714] usb: typec: stusb160x: register role switch before interrupt registration During interrupt registration, attach state is checked. If attached, then the Type-C state is updated with typec_set_xxx functions and role switch is set with usb_role_switch_set_role(). If the usb_role_switch parameter is error or null, the function simply returns 0. So, to update usb_role_switch role if a device is attached before the irq is registered, usb_role_switch must be registered before irq registration. Fixes: da0cb6310094 ("usb: typec: add support for STUSB160x Type-C controller family") Cc: stable Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210716120718.20398-2-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/stusb160x.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 6eaeba9b096e14..3d3848e7c2c2fe 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -739,10 +739,6 @@ static int stusb160x_probe(struct i2c_client *client) typec_set_pwr_opmode(chip->port, chip->pwr_opmode); if (client->irq) { - ret = stusb160x_irq_init(chip, client->irq); - if (ret) - goto port_unregister; - chip->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(chip->role_sw)) { ret = PTR_ERR(chip->role_sw); @@ -752,6 +748,10 @@ static int stusb160x_probe(struct i2c_client *client) ret); goto port_unregister; } + + ret = stusb160x_irq_init(chip, client->irq); + if (ret) + goto role_sw_put; } else { /* * If Source or Dual power role, need to enable VDD supply @@ -775,6 +775,9 @@ static int stusb160x_probe(struct i2c_client *client) return 0; +role_sw_put: + if (chip->role_sw) + usb_role_switch_put(chip->role_sw); port_unregister: typec_unregister_port(chip->port); all_reg_disable: From 6b63376722d9e1b915a2948e9b30f4ba2712e3f5 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 16 Jul 2021 14:07:18 +0200 Subject: [PATCH 593/714] usb: typec: stusb160x: Don't block probing of consumer of "connector" nodes Similar as with tcpm this patch lets fw_devlink know not to wait on the fwnode to be populated as a struct device. Without this patch, USB functionality can be broken on some previously supported boards. Fixes: 28ec344bb891 ("usb: typec: tcpm: Don't block probing of consumers of "connector" nodes") Cc: stable Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210716120718.20398-3-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/stusb160x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 3d3848e7c2c2fe..e7745d1c2a5c46 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -685,6 +685,15 @@ static int stusb160x_probe(struct i2c_client *client) if (!fwnode) return -ENODEV; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* * When both VDD and VSYS power supplies are present, the low power * supply VSYS is selected when VSYS voltage is above 3.1 V. From a6b125621c081bef519fd78cf336de351390da3f Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 1 Jul 2021 11:39:03 +0200 Subject: [PATCH 594/714] MAINTAINERS: repair reference in USB IP DRIVER FOR HISILICON KIRIN 970 Commit 8de6b7edd493 ("phy: phy-hi3670-usb3: move driver from staging into phy") moves phy-hi3670-usb3.c from ./drivers/staging/hikey9xx/ to ./drivers/phy/hisilicon/, but the new file entry in MAINTAINERS refers to ./drivers/phy/hisilicon/phy-kirin970-usb3.c. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/phy/hisilicon/phy-kirin970-usb3.c Repair the file entry by referring to the right location. Fixes: 8de6b7edd493 ("phy: phy-hi3670-usb3: move driver from staging into phy") Acked-by: Mauro Carvalho Chehab Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210701093903.28733-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a61f4f3b78a91f..6f86a58930bf1c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19114,7 +19114,7 @@ M: Mauro Carvalho Chehab L: linux-usb@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml -F: drivers/phy/hisilicon/phy-kirin970-usb3.c +F: drivers/phy/hisilicon/phy-hi3670-usb3.c USB ISP116X DRIVER M: Olav Kongas From 5b01248156bd75303e66985c351dee648c149979 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 18 Jun 2021 22:14:41 +0800 Subject: [PATCH 595/714] usb: gadget: Fix Unbalanced pm_runtime_enable in tegra_xudc_probe Add missing pm_runtime_disable() when probe error out. It could avoid pm_runtime implementation complains when removing and probing again the driver. Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") Cc: stable Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210618141441.107817-1-zhangqilong3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a54d1cef17db8b..c0ca7144e5128f 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3853,6 +3853,7 @@ static int tegra_xudc_probe(struct platform_device *pdev) return 0; free_eps: + pm_runtime_disable(&pdev->dev); tegra_xudc_free_eps(xudc); free_event_ring: tegra_xudc_free_event_ring(xudc); From b5fdf5c6e6bee35837e160c00ac89327bdad031b Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Fri, 25 Jun 2021 15:14:56 +1200 Subject: [PATCH 596/714] usb: max-3421: Prevent corruption of freed memory The MAX-3421 USB driver remembers the state of the USB toggles for a device/endpoint. To save SPI writes, this was only done when a new device/endpoint was being used. Unfortunately, if the old device was removed, this would cause writes to freed memory. To fix this, a simpler scheme is used. The toggles are read from hardware when a URB is completed, and the toggles are always written to hardware when any URB transaction is started. This will cause a few more SPI transactions, but no causes kernel panics. Fixes: 2d53139f3162 ("Add support for using a MAX3421E chip as a host driver.") Cc: stable Signed-off-by: Mark Tomlinson Link: https://lore.kernel.org/r/20210625031456.8632-1-mark.tomlinson@alliedtelesis.co.nz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/max3421-hcd.c | 44 +++++++++++----------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index e7a8e06098535c..59cc1bc7f12f55 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -153,8 +153,6 @@ struct max3421_hcd { */ struct urb *curr_urb; enum scheduling_pass sched_pass; - struct usb_device *loaded_dev; /* dev that's loaded into the chip */ - int loaded_epnum; /* epnum whose toggles are loaded */ int urb_done; /* > 0 -> no errors, < 0: errno */ size_t curr_len; u8 hien; @@ -492,39 +490,17 @@ max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev) * Caller must NOT hold HCD spinlock. */ static void -max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, - int force_toggles) +max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum) { - struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); - int old_epnum, same_ep, rcvtog, sndtog; - struct usb_device *old_dev; + int rcvtog, sndtog; u8 hctl; - old_dev = max3421_hcd->loaded_dev; - old_epnum = max3421_hcd->loaded_epnum; - - same_ep = (dev == old_dev && epnum == old_epnum); - if (same_ep && !force_toggles) - return; - - if (old_dev && !same_ep) { - /* save the old end-points toggles: */ - u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); - - rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; - sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; - - /* no locking: HCD (i.e., we) own toggles, don't we? */ - usb_settoggle(old_dev, old_epnum, 0, rcvtog); - usb_settoggle(old_dev, old_epnum, 1, sndtog); - } /* setup new endpoint's toggle bits: */ rcvtog = usb_gettoggle(dev, epnum, 0); sndtog = usb_gettoggle(dev, epnum, 1); hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) | BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT)); - max3421_hcd->loaded_epnum = epnum; spi_wr8(hcd, MAX3421_REG_HCTL, hctl); /* @@ -532,7 +508,6 @@ max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, * address-assignment so it's best to just always load the * address whenever the end-point changed/was forced. */ - max3421_hcd->loaded_dev = dev; spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum); } @@ -667,7 +642,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd) struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); struct urb *urb, *curr_urb = NULL; struct max3421_ep *max3421_ep; - int epnum, force_toggles = 0; + int epnum; struct usb_host_endpoint *ep; struct list_head *pos; unsigned long flags; @@ -777,7 +752,6 @@ max3421_select_and_start_urb(struct usb_hcd *hcd) usb_settoggle(urb->dev, epnum, 0, 1); usb_settoggle(urb->dev, epnum, 1, 1); max3421_ep->pkt_state = PKT_STATE_SETUP; - force_toggles = 1; } else max3421_ep->pkt_state = PKT_STATE_TRANSFER; } @@ -785,7 +759,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd) spin_unlock_irqrestore(&max3421_hcd->lock, flags); max3421_ep->last_active = max3421_hcd->frame_number; - max3421_set_address(hcd, urb->dev, epnum, force_toggles); + max3421_set_address(hcd, urb->dev, epnum); max3421_set_speed(hcd, urb->dev); max3421_next_transfer(hcd, 0); return 1; @@ -1379,6 +1353,16 @@ max3421_urb_done(struct usb_hcd *hcd) status = 0; urb = max3421_hcd->curr_urb; if (urb) { + /* save the old end-points toggles: */ + u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); + int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; + int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; + int epnum = usb_endpoint_num(&urb->ep->desc); + + /* no locking: HCD (i.e., we) own toggles, don't we? */ + usb_settoggle(urb->dev, epnum, 0, rcvtog); + usb_settoggle(urb->dev, epnum, 1, sndtog); + max3421_hcd->curr_urb = NULL; spin_lock_irqsave(&max3421_hcd->lock, flags); usb_hcd_unlink_urb_from_ep(hcd, urb); From 40edb52298df4c1dbbdb30b19e3ce92cf612a918 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Tue, 29 Jun 2021 09:51:18 +0800 Subject: [PATCH 597/714] usb: dwc3: avoid NULL access of usb_gadget_driver we found crash in dwc3_disconnect_gadget(), it is because dwc->gadget_driver become NULL before async access. 7dc0c55e9f30 ('USB: UDC core: Add udc_async_callbacks gadget op') suggest a common way to avoid such kind of issue. this change implment the callback in dwc3 and change related functions which have callback to usb gadget driver. Acked-by: Alan Stern Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/20210629015118.7944-1-linyyuan@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ep0.c | 10 ++++++---- drivers/usb/dwc3/gadget.c | 21 ++++++++++++++++----- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index dccdf13b5f9e2b..5991766239ba8b 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1279,6 +1279,7 @@ struct dwc3 { unsigned dis_metastability_quirk:1; unsigned dis_split_quirk:1; + unsigned async_callbacks:1; u16 imod_interval; }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 3cd29426437255..2f9e45eed22806 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -597,11 +597,13 @@ static int dwc3_ep0_set_address(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) { - int ret; + int ret = -EINVAL; - spin_unlock(&dwc->lock); - ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); - spin_lock(&dwc->lock); + if (dwc->async_callbacks) { + spin_unlock(&dwc->lock); + ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); + spin_lock(&dwc->lock); + } return ret; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index af6d7f157989da..45f2bc0807e8ce 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2585,6 +2585,16 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA) return ret; } +static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable) +{ + struct dwc3 *dwc = gadget_to_dwc(g); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + dwc->async_callbacks = enable; + spin_unlock_irqrestore(&dwc->lock, flags); +} + static const struct usb_gadget_ops dwc3_gadget_ops = { .get_frame = dwc3_gadget_get_frame, .wakeup = dwc3_gadget_wakeup, @@ -2596,6 +2606,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = { .udc_set_ssp_rate = dwc3_gadget_set_ssp_rate, .get_config_params = dwc3_gadget_config_params, .vbus_draw = dwc3_gadget_vbus_draw, + .udc_async_callbacks = dwc3_gadget_async_callbacks, }; /* -------------------------------------------------------------------------- */ @@ -3231,7 +3242,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, static void dwc3_disconnect_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->disconnect) { + if (dwc->async_callbacks && dwc->gadget_driver->disconnect) { spin_unlock(&dwc->lock); dwc->gadget_driver->disconnect(dwc->gadget); spin_lock(&dwc->lock); @@ -3240,7 +3251,7 @@ static void dwc3_disconnect_gadget(struct dwc3 *dwc) static void dwc3_suspend_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->suspend) { + if (dwc->async_callbacks && dwc->gadget_driver->suspend) { spin_unlock(&dwc->lock); dwc->gadget_driver->suspend(dwc->gadget); spin_lock(&dwc->lock); @@ -3249,7 +3260,7 @@ static void dwc3_suspend_gadget(struct dwc3 *dwc) static void dwc3_resume_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); @@ -3261,7 +3272,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) if (!dwc->gadget_driver) return; - if (dwc->gadget->speed != USB_SPEED_UNKNOWN) { + if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) { spin_unlock(&dwc->lock); usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver); spin_lock(&dwc->lock); @@ -3585,7 +3596,7 @@ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc) * implemented. */ - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); From 4bb233b7ba87785c7ac519863f51ba61f4dbc459 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Jul 2021 15:43:05 +0100 Subject: [PATCH 598/714] usb: gadget: u_serial: remove WARN_ON on null port Loading and then unloading module g_dpgp on a VM that does not support the driver currently throws a WARN_ON message because the port has not been initialized. Removing an unused driver is a valid use-case and the WARN_ON kernel warning is a bit excessive, so remove it. Cleans up: [27654.638698] ------------[ cut here ]------------ [27654.638705] WARNING: CPU: 6 PID: 2956336 at drivers/usb/gadget/function/u_serial.c:1201 gserial_free_line+0x7c/0x90 [u_serial] [27654.638728] Modules linked in: g_dbgp(-) u_serial usb_f_tcm target_core_mod libcomposite udc_core vmw_vmci mcb i2c_nforce2 i2c_amd756 nfit cx8800 videobuf2_dma_sg videobuf2_memops videobuf2_v4l2 cx88xx tveeprom videobuf2_common videodev mc ccp hid_generic hid intel_ishtp cros_ec mc13xxx_core vfio_mdev mdev i915 i2c_algo_bit kvm ppdev parport zatm eni suni uPD98402 atm rio_scan binder_linux hwmon_vid video ipmi_devintf ipmi_msghandler zstd nls_utf8 decnet qrtr ns sctp ip6_udp_tunnel udp_tunnel fcrypt pcbc nhc_udp nhc_ipv6 nhc_routing nhc_mobility nhc_hop nhc_dest nhc_fragment 6lowpan ts_kmp dccp_ipv6 dccp_ipv4 dccp snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq_dummy snd_seq snd_seq_device xen_front_pgdir_shbuf binfmt_misc nls_iso8859_1 dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd rapl soundcore joydev input_leds mac_hid serio_raw efi_pstore [27654.638880] qemu_fw_cfg sch_fq_codel msr virtio_rng autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear qxl drm_ttm_helper crct10dif_pclmul ttm drm_kms_helper syscopyarea sysfillrect sysimgblt virtio_net fb_sys_fops cec net_failover rc_core ahci psmouse drm libahci lpc_ich virtio_blk failover [last unloaded: u_ether] [27654.638949] CPU: 6 PID: 2956336 Comm: modprobe Tainted: P O 5.13.0-9-generic #9 [27654.638956] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [27654.638969] RIP: 0010:gserial_free_line+0x7c/0x90 [u_serial] [27654.638981] Code: 20 00 00 00 00 e8 74 1a ba c9 4c 89 e7 e8 8c fe ff ff 48 8b 3d 75 3b 00 00 44 89 f6 e8 3d 7c 69 c9 5b 41 5c 41 5d 41 5e 5d c3 <0f> 0b 4c 89 ef e8 4a 1a ba c9 5b 41 5c 41 5d 41 5e 5d c3 90 0f 1f [27654.638986] RSP: 0018:ffffba0b81403da0 EFLAGS: 00010246 [27654.638992] RAX: 0000000000000000 RBX: ffffffffc0eaf6a0 RCX: 0000000000000000 [27654.638996] RDX: ffff8e21c0cac8c0 RSI: 0000000000000006 RDI: ffffffffc0eaf6a0 [27654.639000] RBP: ffffba0b81403dc0 R08: ffffba0b81403de0 R09: fefefefefefefeff [27654.639003] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [27654.639006] R13: ffffffffc0eaf6a0 R14: 0000000000000000 R15: 0000000000000000 [27654.639010] FS: 00007faa1935e740(0000) GS:ffff8e223bd80000(0000) knlGS:0000000000000000 [27654.639015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [27654.639019] CR2: 00007ffc840cd4e8 CR3: 000000000e1ac006 CR4: 0000000000370ee0 [27654.639028] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [27654.639031] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [27654.639035] Call Trace: [27654.639044] dbgp_exit+0x1c/0xa1a [g_dbgp] [27654.639054] __do_sys_delete_module.constprop.0+0x144/0x260 [27654.639066] ? call_rcu+0xe/0x10 [27654.639073] __x64_sys_delete_module+0x12/0x20 [27654.639081] do_syscall_64+0x61/0xb0 [27654.639092] ? exit_to_user_mode_loop+0xec/0x160 [27654.639098] ? exit_to_user_mode_prepare+0x37/0xb0 [27654.639104] ? syscall_exit_to_user_mode+0x27/0x50 [27654.639110] ? __x64_sys_close+0x12/0x40 [27654.639119] ? do_syscall_64+0x6e/0xb0 [27654.639126] ? exit_to_user_mode_prepare+0x37/0xb0 [27654.639132] ? syscall_exit_to_user_mode+0x27/0x50 [27654.639137] ? __x64_sys_newfstatat+0x1e/0x20 [27654.639146] ? do_syscall_64+0x6e/0xb0 [27654.639154] ? exc_page_fault+0x8f/0x170 [27654.639159] ? asm_exc_page_fault+0x8/0x30 [27654.639166] entry_SYSCALL_64_after_hwframe+0x44/0xae [27654.639173] RIP: 0033:0x7faa194a4b2b [27654.639179] Code: 73 01 c3 48 8b 0d 3d 73 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 0d 73 0c 00 f7 d8 64 89 01 48 [27654.639185] RSP: 002b:00007ffc840d0578 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [27654.639191] RAX: ffffffffffffffda RBX: 000056060f9f4e70 RCX: 00007faa194a4b2b [27654.639194] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000056060f9f4ed8 [27654.639197] RBP: 000056060f9f4e70 R08: 0000000000000000 R09: 0000000000000000 [27654.639200] R10: 00007faa1951eac0 R11: 0000000000000206 R12: 000056060f9f4ed8 [27654.639203] R13: 0000000000000000 R14: 000056060f9f4ed8 R15: 00007ffc840d06c8 [27654.639219] ---[ end trace 8dd0ea0bb32ce94a ]--- Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210701144305.110078-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index bffef8e47daca8..281ca766698af2 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1198,7 +1198,7 @@ void gserial_free_line(unsigned char port_num) struct gs_port *port; mutex_lock(&ports[port_num].lock); - if (WARN_ON(!ports[port_num].port)) { + if (!ports[port_num].port) { mutex_unlock(&ports[port_num].lock); return; } From 0665e387318607d8269bfdea60723c627c8bae43 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 2 Jul 2021 00:12:24 -0700 Subject: [PATCH 599/714] usb: xhci: avoid renesas_usb_fw.mem when it's unusable Commit a66d21d7dba8 ("usb: xhci: Add support for Renesas controller with memory") added renesas_usb_fw.mem firmware reference to xhci-pci. Thus modinfo indicates xhci-pci.ko has "firmware: renesas_usb_fw.mem". But the firmware is only actually used with CONFIG_USB_XHCI_PCI_RENESAS. An unusable firmware reference can trigger safety checkers which look for drivers with unmet firmware dependencies. Avoid referring to renesas_usb_fw.mem in circumstances when it cannot be loaded (when CONFIG_USB_XHCI_PCI_RENESAS isn't set). Fixes: a66d21d7dba8 ("usb: xhci: Add support for Renesas controller with memory") Cc: stable Signed-off-by: Greg Thelen Link: https://lore.kernel.org/r/20210702071224.3673568-1-gthelen@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 18c2bbddf080b9..1c9a7957c45c5d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -636,7 +636,14 @@ static const struct pci_device_id pci_ids[] = { { /* end: all zeroes */ } }; MODULE_DEVICE_TABLE(pci, pci_ids); + +/* + * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't + * load firmware, so don't encumber the xhci-pci driver with it. + */ +#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS) MODULE_FIRMWARE("renesas_usb_fw.mem"); +#endif /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver xhci_pci_driver = { From 3d11de2d57b92e943767d7d070b0df9b18089d56 Mon Sep 17 00:00:00 2001 From: Artur Petrosyan Date: Sat, 10 Jul 2021 13:22:46 +0400 Subject: [PATCH 600/714] usb: phy: Fix page fault from usb_phy_uevent When the dwc2 platform device is removed, it unregisters the generic phy. usb_remove_phy() is called and the dwc2 usb_phy is removed from the "phy_list", but the uevent may still attempt to get the usb_phy from the list, resulting in a page fault bug. Currently we can't access the usb_phy from the "phy_list" after the device is removed. As a fix check to make sure that we can get the usb_phy before moving forward with the uevent. [ 84.949345] BUG: unable to handle page fault for address:00000007935688d8 [ 84.949349] #PF: supervisor read access in kernel mode [ 84.949351] #PF: error_code(0x0000) - not-present page [ 84.949353] PGD 0 P4D 0 [ 84.949356] Oops: 0000 [#1] SMP PTI [ 84.949360] CPU: 2 PID: 2081 Comm: rmmod Not tainted 5.13.0-rc4-snps-16547-ga8534cb092d7-dirty #32 [ 84.949363] Hardware name: Hewlett-Packard HP Z400 Workstation/0B4Ch, BIOS 786G3 v03.54 11/02/2011 [ 84.949365] RIP: 0010:usb_phy_uevent+0x99/0x121 [ 84.949372] Code: 8d 83 f8 00 00 00 48 3d b0 12 22 94 74 05 4c 3b 23 75 5b 8b 83 9c 00 00 00 be 32 00 00 00 48 8d 7c 24 04 48 c7 c2 d4 5d 7b 93 <48> 8b 0c c5 e0 88 56 93 e8 0f 63 8a ff 8b 83 98 00 00 00 be 32 00 [ 84.949375] RSP: 0018:ffffa46bc0f2fc70 EFLAGS: 00010246 [ 84.949378] RAX: 00000000ffffffff RBX: ffffffff942211b8 RCX: 0000000000000027 [ 84.949380] RDX: ffffffff937b5dd4 RSI: 0000000000000032 RDI: ffffa46bc0f2fc74 [ 84.949383] RBP: ffff94a306613000 R08: 0000000000000000 R09: 00000000fffeffff [ 84.949385] R10: ffffa46bc0f2faa8 R11: ffffa46bc0f2faa0 R12: ffff94a30186d410 [ 84.949387] R13: ffff94a32d188a80 R14: ffff94a30029f960 R15: ffffffff93522dd0 [ 84.949389] FS: 00007efdbd417540(0000) GS:ffff94a513a80000(0000) knlGS:0000000000000000 [ 84.949392] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 84.949394] CR2: 00000007935688d8 CR3: 0000000165606000 CR4: 00000000000006e0 [ 84.949396] Call Trace: [ 84.949401] dev_uevent+0x190/0x1ad [ 84.949408] kobject_uevent_env+0x18e/0x46c [ 84.949414] device_release_driver_internal+0x17f/0x18e [ 84.949418] bus_remove_device+0xd3/0xe5 [ 84.949421] device_del+0x1c3/0x31d [ 84.949425] ? kobject_put+0x97/0xa8 [ 84.949428] platform_device_del+0x1c/0x63 [ 84.949432] platform_device_unregister+0xa/0x11 [ 84.949436] dwc2_pci_remove+0x1e/0x2c [dwc2_pci] [ 84.949440] pci_device_remove+0x31/0x81 [ 84.949445] device_release_driver_internal+0xea/0x18e [ 84.949448] driver_detach+0x68/0x72 [ 84.949450] bus_remove_driver+0x63/0x82 [ 84.949453] pci_unregister_driver+0x1a/0x75 [ 84.949457] __do_sys_delete_module+0x149/0x1e9 [ 84.949462] ? task_work_run+0x64/0x6e [ 84.949465] ? exit_to_user_mode_prepare+0xd4/0x10d [ 84.949471] do_syscall_64+0x5d/0x70 [ 84.949475] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 84.949480] RIP: 0033:0x7efdbd563bcb [ 84.949482] Code: 73 01 c3 48 8b 0d c5 82 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 95 82 0c 00 f7 d8 64 89 01 48 [ 84.949485] RSP: 002b:00007ffe944d7d98 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 84.949489] RAX: ffffffffffffffda RBX: 00005651072eb700 RCX: 00007efdbd563bcb [ 84.949491] RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005651072eb768 [ 84.949493] RBP: 00007ffe944d7df8 R08: 0000000000000000 R09: 0000000000000000 [ 84.949495] R10: 00007efdbd5dfac0 R11: 0000000000000206 R12: 00007ffe944d7fd0 [ 84.949497] R13: 00007ffe944d8610 R14: 00005651072eb2a0 R15: 00005651072eb700 [ 84.949500] Modules linked in: uas configfs dwc2_pci(-) phy_generic fuse crc32c_intel [last unloaded: udc_core] [ 84.949508] CR2: 00000007935688d8 [ 84.949510] ---[ end trace e40c871ca3e4dc9e ]--- [ 84.949512] RIP: 0010:usb_phy_uevent+0x99/0x121 Fixes: a8534cb092d7 ("usb: phy: introduce usb_phy device type with its own uevent handler") Reviewed-by: Peter Chen Signed-off-by: Artur Petrosyan Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/20210710092247.D7AFEA005D@mailhost.synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c index 83ed5089475a37..1b24492bb4e5f6 100644 --- a/drivers/usb/phy/phy.c +++ b/drivers/usb/phy/phy.c @@ -86,10 +86,10 @@ static struct usb_phy *__device_to_usb_phy(struct device *dev) list_for_each_entry(usb_phy, &phy_list, head) { if (usb_phy->dev == dev) - break; + return usb_phy; } - return usb_phy; + return NULL; } static void usb_phy_set_default_current(struct usb_phy *usb_phy) @@ -150,8 +150,14 @@ static int usb_phy_uevent(struct device *dev, struct kobj_uevent_env *env) struct usb_phy *usb_phy; char uchger_state[50] = { 0 }; char uchger_type[50] = { 0 }; + unsigned long flags; + spin_lock_irqsave(&phy_lock, flags); usb_phy = __device_to_usb_phy(dev); + spin_unlock_irqrestore(&phy_lock, flags); + + if (!usb_phy) + return -ENODEV; snprintf(uchger_state, ARRAY_SIZE(uchger_state), "USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]); From fecb3a171db425e5068b27231f8efe154bf72637 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 13 Jul 2021 09:32:55 +0400 Subject: [PATCH 601/714] usb: dwc2: gadget: Fix GOUTNAK flow for Slave mode. Because of dwc2_hsotg_ep_stop_xfr() function uses poll mode, first need to mask GINTSTS_GOUTNAKEFF interrupt. In Slave mode GINTSTS_GOUTNAKEFF interrupt will be aserted only after pop OUT NAK status packet from RxFIFO. In dwc2_hsotg_ep_sethalt() function before setting DCTL_SGOUTNAK need to unmask GOUTNAKEFF interrupt. Tested by USBCV CH9 and MSC tests set in Slave, BDMA and DDMA. All tests are passed. Fixes: a4f827714539a ("usb: dwc2: gadget: Disable enabled HW endpoint in dwc2_hsotg_ep_disable") Fixes: 6070636c4918c ("usb: dwc2: Fix Stalling a Non-Isochronous OUT EP") Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/e17fad802bbcaf879e1ed6745030993abb93baf8.1626152924.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index c581ee41ac81b9..74d25019272f11 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3900,9 +3900,27 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg, __func__); } } else { + /* Mask GINTSTS_GOUTNAKEFF interrupt */ + dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK); + if (!using_dma(hsotg)) { + /* Wait for GINTSTS_RXFLVL interrupt */ + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, + GINTSTS_RXFLVL, 100)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n", + __func__); + } else { + /* + * Pop GLOBAL OUT NAK status packet from RxFIFO + * to assert GOUTNAKEFF interrupt + */ + dwc2_readl(hsotg, GRXSTSP); + } + } + /* Wait for global nak to take effect */ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_GOUTNAKEFF, 100)) @@ -4348,6 +4366,9 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now) epctl = dwc2_readl(hs, epreg); if (value) { + /* Unmask GOUTNAKEFF interrupt */ + dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK); // STALL bit will be set in GOUTNAKEFF interrupt handler From 5719df243e118fb343725e8b2afb1637e1af1373 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 24 Jun 2021 21:20:39 +0900 Subject: [PATCH 602/714] usb: renesas_usbhs: Fix superfluous irqs happen after usb_pkt_pop() This driver has a potential issue which this driver is possible to cause superfluous irqs after usb_pkt_pop() is called. So, after the commit 3af32605289e ("usb: renesas_usbhs: fix error return code of usbhsf_pkt_handler()") had been applied, we could observe the following error happened when we used g_audio. renesas_usbhs e6590000.usb: irq_ready run_error 1 : -22 To fix the issue, disable the tx or rx interrupt in usb_pkt_pop(). Fixes: 2743e7f90dc0 ("usb: renesas_usbhs: fix the usb_pkt_pop()") Cc: # v4.4+ Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20210624122039.596528-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index b5e7991dc7d9e5..a3c2b01ccf7b55 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -101,6 +101,8 @@ static struct dma_chan *usbhsf_dma_chan_get(struct usbhs_fifo *fifo, #define usbhsf_dma_map(p) __usbhsf_dma_map_ctrl(p, 1) #define usbhsf_dma_unmap(p) __usbhsf_dma_map_ctrl(p, 0) static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map); +static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable); +static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable); struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); @@ -123,6 +125,11 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) if (chan) { dmaengine_terminate_all(chan); usbhsf_dma_unmap(pkt); + } else { + if (usbhs_pipe_is_dir_in(pipe)) + usbhsf_rx_irq_ctrl(pipe, 0); + else + usbhsf_tx_irq_ctrl(pipe, 0); } usbhs_pipe_clear_without_sequence(pipe, 0, 0); From c4a0f7a6ab5417eb6105b0e1d7e6e67f6ef7d4e5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 16 Jul 2021 07:01:27 +0200 Subject: [PATCH 603/714] usb: dwc2: Skip clock gating on Samsung SoCs Commit 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.") changed the way the driver handles power down modes in a such way that it uses clock gating when no other power down mode is available. This however doesn't work well on the DWC2 implementation used on the Samsung SoCs. When a clock gating is enabled, system hangs. It looks that the proper clock gating requires some additional glue code in the shared USB2 PHY and/or Samsung glue code for the DWC2. To restore driver operation on the Samsung SoCs simply skip enabling clock gating mode until one finds what is really needed to make it working reliably. Fixes: 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.") Cc: stable Acked-by: Krzysztof Kozlowski Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20210716050127.4406-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 4 ++++ drivers/usb/dwc2/core_intr.c | 3 ++- drivers/usb/dwc2/hcd.c | 6 ++++-- drivers/usb/dwc2/params.c | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index ab6b815e0089ca..483de2bbfaabe3 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -383,6 +383,9 @@ enum dwc2_ep0_state { * 0 - No (default) * 1 - Partial power down * 2 - Hibernation + * @no_clock_gating: Specifies whether to avoid clock gating feature. + * 0 - No (use clock gating) + * 1 - Yes (avoid it) * @lpm: Enable LPM support. * 0 - No * 1 - Yes @@ -480,6 +483,7 @@ struct dwc2_core_params { #define DWC2_POWER_DOWN_PARAM_NONE 0 #define DWC2_POWER_DOWN_PARAM_PARTIAL 1 #define DWC2_POWER_DOWN_PARAM_HIBERNATION 2 + bool no_clock_gating; bool lpm; bool lpm_clock_gating; diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index a5ab03808da69f..a5c52b237e7237 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -556,7 +556,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg) * If neither hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_gadget_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_gadget_enter_clock_gating(hsotg); } /* diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 035d4911a3c324..2a7828971d0564 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -3338,7 +3338,8 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 windex) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); break; } @@ -4402,7 +4403,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); /* After entering suspend, hardware is not accessible */ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 67c5eb1402325a..59e11934599475 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -76,6 +76,7 @@ static void dwc2_set_s3c6400_params(struct dwc2_hsotg *hsotg) struct dwc2_core_params *p = &hsotg->params; p->power_down = DWC2_POWER_DOWN_PARAM_NONE; + p->no_clock_gating = true; p->phy_utmi_width = 8; } From d53dc38857f6dbefabd9eecfcbf67b6eac9a1ef4 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 20 Jul 2021 05:41:24 -0700 Subject: [PATCH 604/714] usb: dwc2: gadget: Fix sending zero length packet in DDMA mode. Sending zero length packet in DDMA mode perform by DMA descriptor by setting SP (short packet) flag. For DDMA in function dwc2_hsotg_complete_in() does not need to send zlp. Tested by USBCV MSC tests. Fixes: f71b5e2533de ("usb: dwc2: gadget: fix zero length packet transfers") Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/967bad78c55dd2db1c19714eee3d0a17cf99d74a.1626777738.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 74d25019272f11..3146df6e6510d2 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2749,12 +2749,14 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg, return; } - /* Zlp for all endpoints, for ep0 only in DATA IN stage */ + /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */ if (hs_ep->send_zlp) { - dwc2_hsotg_program_zlp(hsotg, hs_ep); hs_ep->send_zlp = 0; - /* transfer will be completed on next complete interrupt */ - return; + if (!using_desc_dma(hsotg)) { + dwc2_hsotg_program_zlp(hsotg, hs_ep); + /* transfer will be completed on next complete interrupt */ + return; + } } if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) { From 1f958f3dff42a7239bbfdd9a33db5f11574d16bf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Jul 2021 09:55:38 +0200 Subject: [PATCH 605/714] Revert "arm64: dts: qcom: Harmonize DWC USB3 DT nodes name" This reverts commit eb9b7bfd5954f5f6ac4d57313541dd0294660aad as it breaks working userspace implementations (i.e. Android systems) The device node name here is part of configfs, so it is a user-visable api that can not be changed. Reported-by: John Stultz Cc: Serge Semin Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Link: https://lore.kernel.org/r/CALAqxLX_FNvFndEDWtGbFPjSzuAbfqxQE07diBJFZtftwEJX5A@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- arch/arm64/boot/dts/qcom/qcs404-evb.dtsi | 2 +- arch/arm64/boot/dts/qcom/qcs404.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 +- arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 068692350e000b..51e17094d7b18e 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -1063,7 +1063,7 @@ status = "okay"; extcon = <&usb2_id>; - usb@7600000 { + dwc3@7600000 { extcon = <&usb2_id>; dr_mode = "otg"; maximum-speed = "high-speed"; @@ -1074,7 +1074,7 @@ status = "okay"; extcon = <&usb3_id>; - usb@6a00000 { + dwc3@6a00000 { extcon = <&usb3_id>; dr_mode = "otg"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 95d6cb8cd4c0c1..f39bc10cc5bd73 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -443,7 +443,7 @@ resets = <&gcc GCC_USB0_BCR>; status = "disabled"; - dwc_0: usb@8a00000 { + dwc_0: dwc3@8a00000 { compatible = "snps,dwc3"; reg = <0x8a00000 0xcd00>; interrupts = ; @@ -484,7 +484,7 @@ resets = <&gcc GCC_USB1_BCR>; status = "disabled"; - dwc_1: usb@8c00000 { + dwc_1: dwc3@8c00000 { compatible = "snps,dwc3"; reg = <0x8c00000 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 0e1bc4669d7e10..78c55ca10ba9b1 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2566,7 +2566,7 @@ power-domains = <&gcc USB30_GDSC>; status = "disabled"; - usb@6a00000 { + dwc3@6a00000 { compatible = "snps,dwc3"; reg = <0x06a00000 0xcc00>; interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; @@ -2873,7 +2873,7 @@ qcom,select-utmi-as-pipe-clk; status = "disabled"; - usb@7600000 { + dwc3@7600000 { compatible = "snps,dwc3"; reg = <0x07600000 0xcc00>; interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 6f294f9c0cdf15..e9d3ce29937c4e 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -1964,7 +1964,7 @@ resets = <&gcc GCC_USB_30_BCR>; - usb3_dwc3: usb@a800000 { + usb3_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0x0a800000 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index f8a55307b8554a..a80c578484ba3e 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -337,7 +337,7 @@ &usb3 { status = "okay"; - usb@7580000 { + dwc3@7580000 { dr_mode = "host"; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index 9c4be020d5689a..339790ba585de9 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -544,7 +544,7 @@ assigned-clock-rates = <19200000>, <200000000>; status = "disabled"; - usb@7580000 { + dwc3@7580000 { compatible = "snps,dwc3"; reg = <0x07580000 0xcd00>; interrupts = ; @@ -573,7 +573,7 @@ assigned-clock-rates = <19200000>, <133333333>; status = "disabled"; - usb@78c0000 { + dwc3@78c0000 { compatible = "snps,dwc3"; reg = <0x078c0000 0xcc00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index a5d58eb928960b..a9a052f8c63c86 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -2756,7 +2756,7 @@ <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xe000>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1796ae8372be24..0a86fe71a66d19 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3781,7 +3781,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; @@ -3829,7 +3829,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_2_dwc3: usb@a800000 { + usb_2_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 612dda0fef4385..eef9d79157e981 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -2344,7 +2344,7 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; From 7764656b108cd308c39e9a8554353b8f9ca232a3 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 5 Jul 2021 21:38:29 +0800 Subject: [PATCH 606/714] nvme-pci: don't WARN_ON in nvme_reset_work if ctrl.state is not RESETTING Followling process: nvme_probe nvme_reset_ctrl nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->reset_work) --------------> nvme_remove nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING) worker_thread process_one_work nvme_reset_work WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING) , which will trigger WARN_ON in nvme_reset_work(): [ 127.534298] WARNING: CPU: 0 PID: 139 at drivers/nvme/host/pci.c:2594 [ 127.536161] CPU: 0 PID: 139 Comm: kworker/u8:7 Not tainted 5.13.0 [ 127.552518] Call Trace: [ 127.552840] ? kvm_sched_clock_read+0x25/0x40 [ 127.553936] ? native_send_call_func_single_ipi+0x1c/0x30 [ 127.555117] ? send_call_function_single_ipi+0x9b/0x130 [ 127.556263] ? __smp_call_single_queue+0x48/0x60 [ 127.557278] ? ttwu_queue_wakelist+0xfa/0x1c0 [ 127.558231] ? try_to_wake_up+0x265/0x9d0 [ 127.559120] ? ext4_end_io_rsv_work+0x160/0x290 [ 127.560118] process_one_work+0x28c/0x640 [ 127.561002] worker_thread+0x39a/0x700 [ 127.561833] ? rescuer_thread+0x580/0x580 [ 127.562714] kthread+0x18c/0x1e0 [ 127.563444] ? set_kthread_struct+0x70/0x70 [ 127.564347] ret_from_fork+0x1f/0x30 The preceding problem can be easily reproduced by executing following script (based on blktests suite): test() { pdev="$(_get_pci_dev_from_blkdev)" sysfs="/sys/bus/pci/devices/${pdev}" for ((i = 0; i < 10; i++)); do echo 1 > "$sysfs/remove" echo 1 > /sys/bus/pci/rescan done } Since the device ctrl could be updated as an non-RESETTING state by repeating probe/remove in userspace (which is a normal situation), we can replace stack dumping WARN_ON with a warnning message. Fixes: 82b057caefaff ("nvme-pci: fix multiple ctrl removal schedulin") Signed-off-by: Zhihao Cheng --- drivers/nvme/host/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 320051f5a3ddca..51852085239ef9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2631,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { + if (dev->ctrl.state != NVME_CTRL_RESETTING) { + dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", + dev->ctrl.state); result = -ENODEV; goto out; } From 5396fdac56d87d04e75e5068c0c92d33625f51e7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 16 Jul 2021 13:30:35 +0200 Subject: [PATCH 607/714] nvme: fix refcounting imbalance when all paths are down When the last path to a ns_head drops the current code removes the ns_head from the subsystem list, but will only delete the disk itself if the last reference to the ns_head drops. This is causing an refcounting imbalance eg when applications have a reference to the disk, as then they'll never get notified that the disk is in fact dead. This patch moves the call 'del_gendisk' into nvme_mpath_check_last_path(), ensuring that the disk can be properly removed and applications get the appropriate notifications. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 +++++++++++--- drivers/nvme/host/multipath.c | 9 ++++++++- drivers/nvme/host/nvme.h | 11 ++--------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 11779be4218686..17c05a4595f0c4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3807,6 +3807,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, static void nvme_ns_remove(struct nvme_ns *ns) { + bool last_path = false; + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -3815,8 +3817,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) - list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); synchronize_rcu(); /* guarantee not available in head->list */ @@ -3836,7 +3836,15 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - nvme_mpath_check_last_path(ns); + /* Synchronize with nvme_init_ns_head() */ + mutex_lock(&ns->head->subsys->lock); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } + mutex_unlock(&ns->head->subsys->lock); + if (last_path) + nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0ea5298469c349..3f32c5e86bfcb2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -760,14 +760,21 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) #endif } -void nvme_mpath_remove_disk(struct nvme_ns_head *head) +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; + kblockd_schedule_work(&head->requeue_work); if (head->disk->flags & GENHD_FL_UP) { nvme_cdev_del(&head->cdev, &head->cdev_device); del_gendisk(head->disk); } +} + +void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ + if (!head->disk) + return; blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 18ef8dd03a90e2..5cd1fa3b8464db 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head->disk && list_empty(&head->list)) - kblockd_schedule_work(&head->requeue_work); -} +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); static inline void nvme_trace_bio_complete(struct request *req) { @@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { } static inline void nvme_trace_bio_complete(struct request *req) From 234211b8dd161fa25f192c78d5a8d2dd6bf920a0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Jul 2021 09:44:39 -0700 Subject: [PATCH 608/714] nvme: fix nvme_setup_command metadata trace event The metadata address is set after the trace event, so the trace is not capturing anything useful. Rather than logging the memory address, it's useful to know if the command carries a metadata payload, so change the trace event to log that true/false state instead. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index daaf700eae7996..35bac7a2542278 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -56,7 +56,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(u8, fctype) __field(u16, cid) __field(u32, nsid) - __field(u64, metadata) + __field(bool, metadata) __array(u8, cdw10, 24) ), TP_fast_assign( @@ -66,13 +66,13 @@ TRACE_EVENT(nvme_setup_cmd, __entry->flags = cmd->common.flags; __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); - __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->metadata = !!blk_integrity_rq(req); __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); ), - TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)", __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, From 5c912e679506ef72adb95616d2f56a8a1b079a3d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 21 May 2021 02:10:10 +0000 Subject: [PATCH 609/714] usb: cdc-wdm: fix build error when CONFIG_WWAN_CORE is not set Gcc report build error as following when CONFIG_WWAN_CORE is not set: x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_disconnect': cdc-wdm.c:(.text+0xb2a): undefined reference to `wwan_remove_port' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_in_callback': cdc-wdm.c:(.text+0xf23): undefined reference to `wwan_port_rx' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_stop': cdc-wdm.c:(.text+0x127d): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_tx': cdc-wdm.c:(.text+0x12d9): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: cdc-wdm.c:(.text+0x13c1): undefined reference to `wwan_port_txoff' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_start': cdc-wdm.c:(.text+0x13e0): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: cdc-wdm.c:(.text+0x1431): undefined reference to `wwan_port_txon' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_tx_complete': cdc-wdm.c:(.text+0x14a4): undefined reference to `wwan_port_txon' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_create.cold': cdc-wdm.c:(.text.unlikely+0x209): undefined reference to `wwan_create_port' Using CONFIG_WWAN_CORE instead of CONFIG_WWAN to avoid build error. Fixes: cac6fb015f71 ("usb: class: cdc-wdm: WWAN framework integration") Reported-by: Hulk Robot Reviewed-by: Loic Poulain Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210521021010.2490930-1-weiyongjun1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fdf79bcf7eb09e..35d5908b5478aa 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = { }; /* --- WWAN framework integration --- */ -#ifdef CONFIG_WWAN +#ifdef CONFIG_WWAN_CORE static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); @@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length) /* inbuf has been copied, it is safe to check for outstanding data */ schedule_work(&desc->service_outs_intr); } -#else /* CONFIG_WWAN */ +#else /* CONFIG_WWAN_CORE */ static void wdm_wwan_init(struct wdm_device *desc) {} static void wdm_wwan_deinit(struct wdm_device *desc) {} static void wdm_wwan_rx(struct wdm_device *desc, int length) {} -#endif /* CONFIG_WWAN */ +#endif /* CONFIG_WWAN_CORE */ /* --- error handling --- */ static void wdm_rxwork(struct work_struct *work) From f3a1a937f7b240be623d989c8553a6d01465d04f Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 21 Jul 2021 11:25:16 +0200 Subject: [PATCH 610/714] Revert "USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem" This reverts commit 0bd860493f81eb2a46173f6f5e44cc38331c8dbd. While the patch was working as stated,ie preventing the L850-GL LTE modem from crashing on some U3 wake-ups due to a race condition between the host wake-up and the modem-side wake-up, when using the MBIM interface, this would force disabling the USB runtime PM on the device. The increased power consumption is significant for LTE laptops, and given that with decently recent modem firmwares, when the modem hits the bug, it automatically recovers (ie it drops from the bus, but automatically re-enumerates after less than half a second, rather than being stuck until a power cycle as it was doing with ancient firmware), for most people, the trade-off now seems in favor of re-enabling it by default. For people with access to the platform code, the bug can also be worked-around successfully by changing the USB3 LFPM polling off-time for the XHCI controller in the BIOS code. Signed-off-by: Vincent Palatin Link: https://lore.kernel.org/r/20210721092516.2775971-1-vpalatin@chromium.org Fixes: 0bd860493f81 ("USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6114cf83bb4479..8239fe7129dd7a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -501,10 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, - /* Fibocom L850-GL LTE Modem */ - { USB_DEVICE(0x2cb7, 0x0007), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, From a7c3acca53801e10a77ede6b759a73f5ac4bc261 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 21 Jul 2021 09:18:35 +0200 Subject: [PATCH 611/714] arm64: smccc: Save lr before calling __arm_smccc_sve_check() Commit cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") added a call to __arm_smccc_sve_check() which clobbers the lr (register x30), causing __arm_smccc_hvc() to return to itself and crash. Save lr on the stack before calling __arm_smccc_sve_check(). Save the frame pointer (x29) to complete the frame record, and adjust the offsets used to access stack parameters. Acked-by: Ard Biesheuvel Acked-by: Mark Brown Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210721071834.69130-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/smccc-call.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index d3d37f932b97a5..487381164ff6b6 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -32,20 +32,23 @@ SYM_FUNC_END(__arm_smccc_sve_check) EXPORT_SYMBOL(__arm_smccc_sve_check) .macro SMCCC instr + stp x29, x30, [sp, #-16]! + mov x29, sp alternative_if ARM64_SVE bl __arm_smccc_sve_check alternative_else_nop_endif \instr #0 - ldr x4, [sp] + ldr x4, [sp, #16] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #8] + ldr x4, [sp, #24] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ret +1: ldp x29, x30, [sp], #16 + ret .endm /* From d8a719059b9dc963aa190598778ac804ff3e6a87 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 21 Jul 2021 17:02:13 +1000 Subject: [PATCH 612/714] Revert "mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge" This reverts commit c742199a014de23ee92055c2473d91fe5561ffdf. c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") breaks arm64 in at least two ways for configurations where PUD or PMD folding occur: 1. We no longer install huge-vmap mappings and silently fall back to page-granular entries, despite being able to install block entries at what is effectively the PGD level. 2. If the linear map is backed with block mappings, these will now silently fail to be created in alloc_init_pud(), causing a panic early during boot. The pgtable selftests caught this, although a fix has not been forthcoming and Christophe is AWOL at the moment, so just revert the change for now to get a working -rc3 on which we can queue patches for 5.15. A simple revert breaks the build for 32-bit PowerPC 8xx machines, which rely on the default function definitions when the corresponding page-table levels are folded, since commit a6a8f7c4aa7e ("powerpc/8xx: add support for huge pages on VMAP and VMALLOC"), eg: powerpc64-linux-ld: mm/vmalloc.o: in function `vunmap_pud_range': linux/mm/vmalloc.c:362: undefined reference to `pud_clear_huge' To avoid that, add stubs for pud_clear_huge() and pmd_clear_huge() in arch/powerpc/mm/nohash/8xx.c as suggested by Christophe. Cc: Christophe Leroy Cc: Catalin Marinas Cc: Andrew Morton Cc: Nicholas Piggin Cc: Mike Rapoport Cc: Mark Rutland Cc: Geert Uytterhoeven Fixes: c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") Signed-off-by: Jonathan Marek Reviewed-by: Ard Biesheuvel Acked-by: Marc Zyngier [mpe: Fold in 8xx.c changes from Christophe and mention in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linux-arm-kernel/CAMuHMdXShORDox-xxaeUfDW3wx2PeggFSqhVSHVZNKCGK-y_vQ@mail.gmail.com/ Link: https://lore.kernel.org/r/20210717160118.9855-1-jonathan@marek.ca Link: https://lore.kernel.org/r/87r1fs1762.fsf@mpe.ellerman.id.au Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 20 ++++++++------------ arch/powerpc/mm/nohash/8xx.c | 10 ++++++++++ arch/x86/mm/pgtable.c | 34 +++++++++++++++------------------- include/linux/pgtable.h | 26 +------------------------- 4 files changed, 34 insertions(+), 56 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d7458650844888..9ff0de1b2b93c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1339,7 +1339,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -#if CONFIG_PGTABLE_LEVELS > 3 int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); @@ -1354,16 +1353,6 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) return 1; } -int pud_clear_huge(pud_t *pudp) -{ - if (!pud_sect(READ_ONCE(*pudp))) - return 0; - pud_clear(pudp); - return 1; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); @@ -1378,6 +1367,14 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 1; } +int pud_clear_huge(pud_t *pudp) +{ + if (!pud_sect(READ_ONCE(*pudp))) + return 0; + pud_clear(pudp); + return 1; +} + int pmd_clear_huge(pmd_t *pmdp) { if (!pmd_sect(READ_ONCE(*pmdp))) @@ -1385,7 +1382,6 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } -#endif int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) { diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 60780e08911817..0df9fe29dd567f 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -240,3 +240,13 @@ void __init setup_kuap(bool disabled) mtspr(SPRN_MD_AP, MD_APG_KUAP); } #endif + +int pud_clear_huge(pud_t *pud) +{ + return 0; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + return 0; +} diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3364fe62b90374..3481b35cb4ec7e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -682,7 +682,6 @@ int p4d_clear_huge(p4d_t *p4d) } #endif -#if CONFIG_PGTABLE_LEVELS > 3 /** * pud_set_huge - setup kernel PUD mapping * @@ -721,23 +720,6 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } -/** - * pud_clear_huge - clear kernel PUD mapping when it is set - * - * Returns 1 on success and 0 on failure (no PUD map is found). - */ -int pud_clear_huge(pud_t *pud) -{ - if (pud_large(*pud)) { - pud_clear(pud); - return 1; - } - - return 0; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 /** * pmd_set_huge - setup kernel PMD mapping * @@ -768,6 +750,21 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + /** * pmd_clear_huge - clear kernel PMD mapping when it is set * @@ -782,7 +779,6 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } -#endif #ifdef CONFIG_X86_64 /** diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index d147480cdefc7f..e24d2c992b1129 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1397,34 +1397,10 @@ static inline int p4d_clear_huge(p4d_t *p4d) } #endif /* !__PAGETABLE_P4D_FOLDED */ -#ifndef __PAGETABLE_PUD_FOLDED int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); -int pud_clear_huge(pud_t *pud); -#else -static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pud_clear_huge(pud_t *pud) -{ - return 0; -} -#endif /* !__PAGETABLE_PUD_FOLDED */ - -#ifndef __PAGETABLE_PMD_FOLDED int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); +int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -#else -static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pmd_clear_huge(pmd_t *pmd) -{ - return 0; -} -#endif /* !__PAGETABLE_PMD_FOLDED */ - int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); From 2dbd9c27eda5cf83aa990266a3355960d860da71 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 8 Jul 2021 14:25:18 +0300 Subject: [PATCH 613/714] drm/ttm: add missing NULL checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create() <-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* Added NULL check inside ttm_mem_io_free() to prevent reported GPF and make this function NULL save in future. Same problem was in ttm_bo_move_to_lru_tail() as Christian reported. ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer can be NULL as well as in ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] ... RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 .. Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210708112518.17271-1-paskripkin@gmail.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b11..8d7fd65ccced30 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6dbdd..763fa6f4e07de8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return; From 56f6f4c4eb2a710ec8878dd9373d3d2b2eb75f5c Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 16 Jul 2021 13:21:04 +0530 Subject: [PATCH 614/714] bus: mhi: pci_generic: Apply no-op for wake using sideband wake boolean Devices such as SDX24 do not have the provision for inband wake doorbell in the form of channel 127 and instead have a sideband GPIO for it. Newer devices such as SDX55 or SDX65 support inband wake method by default. Ensure the functionality is used based on this such that device wake stays held when a client driver uses mhi_device_get() API or the equivalent debugfs entry. Link: https://lore.kernel.org/r/1624560809-30610-1-git-send-email-bbhatt@codeaurora.org Fixes: e3e5e6508fc1 ("bus: mhi: pci_generic: No-Op for device_wake operations") Cc: stable@vger.kernel.org #5.12 Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index ca3bc40427f857..3396cb30ebeca6 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -32,6 +32,8 @@ * @edl: emergency download mode firmware path (if any) * @bar_num: PCI base address register to use for MHI MMIO register space * @dma_data_width: DMA transfer word size (32 or 64 bits) + * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead + * of inband wake support (such as sdx24) */ struct mhi_pci_dev_info { const struct mhi_controller_config *config; @@ -40,6 +42,7 @@ struct mhi_pci_dev_info { const char *edl; unsigned int bar_num; unsigned int dma_data_width; + bool sideband_wake; }; #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \ @@ -242,7 +245,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = { .edl = "qcom/sdx65m/edl.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { @@ -251,7 +255,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { .edl = "qcom/sdx55m/edl.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { @@ -259,7 +264,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = { @@ -301,7 +307,8 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_quectel_em1xx_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = { @@ -339,7 +346,8 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = { .edl = "qcom/sdx55m/edl.mbn", .config = &modem_foxconn_sdx55_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct pci_device_id mhi_pci_id_table[] = { @@ -640,9 +648,12 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mhi_cntrl->status_cb = mhi_pci_status_cb; mhi_cntrl->runtime_get = mhi_pci_runtime_get; mhi_cntrl->runtime_put = mhi_pci_runtime_put; - mhi_cntrl->wake_get = mhi_pci_wake_get_nop; - mhi_cntrl->wake_put = mhi_pci_wake_put_nop; - mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; + + if (info->sideband_wake) { + mhi_cntrl->wake_get = mhi_pci_wake_get_nop; + mhi_cntrl->wake_put = mhi_pci_wake_put_nop; + mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; + } err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width)); if (err) From 546362a9ef2ef40b57c6605f14e88ced507f8dd0 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 16 Jul 2021 13:21:05 +0530 Subject: [PATCH 615/714] bus: mhi: core: Validate channel ID when processing command completions MHI reads the channel ID from the event ring element sent by the device which can be any value between 0 and 255. In order to prevent any out of bound accesses, add a check against the maximum number of channels supported by the controller and those channels not configured yet so as to skip processing of that event ring element. Link: https://lore.kernel.org/r/1624558141-11045-1-git-send-email-bbhatt@codeaurora.org Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device") Cc: stable@vger.kernel.org #5.10 Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 22acde118bc352..fc9196f11cb7db 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -773,11 +773,18 @@ static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl, cmd_pkt = mhi_to_virtual(mhi_ring, ptr); chan = MHI_TRE_GET_CMD_CHID(cmd_pkt); - mhi_chan = &mhi_cntrl->mhi_chan[chan]; - write_lock_bh(&mhi_chan->lock); - mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); - complete(&mhi_chan->completion); - write_unlock_bh(&mhi_chan->lock); + + if (chan < mhi_cntrl->max_chan && + mhi_cntrl->mhi_chan[chan].configured) { + mhi_chan = &mhi_cntrl->mhi_chan[chan]; + write_lock_bh(&mhi_chan->lock); + mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); + complete(&mhi_chan->completion); + write_unlock_bh(&mhi_chan->lock); + } else { + dev_err(&mhi_cntrl->mhi_dev->dev, + "Completion packet for invalid channel ID: %d\n", chan); + } mhi_del_ring_element(mhi_cntrl, mhi_ring); } From b8a97f2a65388394f433bf0730293a94f7d49046 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 16 Jul 2021 13:21:06 +0530 Subject: [PATCH 616/714] bus: mhi: pci_generic: Fix inbound IPCR channel The qrtr-mhi client driver assumes that inbound buffers are automatically allocated and queued by the MHI core, but this doesn't happen for mhi pci devices since IPCR inbound channel is not flagged with auto_queue, causing unusable IPCR (qrtr) feature. Fix that. Link: https://lore.kernel.org/r/1625736749-24947-1-git-send-email-loic.poulain@linaro.org [mani: fixed a spelling mistake in commit description] Fixes: 855a70c12021 ("bus: mhi: Add MHI PCI support for WWAN modems") Cc: stable@vger.kernel.org #5.10 Reviewed-by: Hemant kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Loic Poulain Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 3396cb30ebeca6..4dd1077354af0c 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -75,6 +75,22 @@ struct mhi_pci_dev_info { .doorbell_mode_switch = false, \ } +#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \ + { \ + .num = ch_num, \ + .name = ch_name, \ + .num_elements = el_count, \ + .event_ring = ev_ring, \ + .dir = DMA_FROM_DEVICE, \ + .ee_mask = BIT(MHI_EE_AMSS), \ + .pollcfg = 0, \ + .doorbell = MHI_DB_BRST_DISABLE, \ + .lpm_notify = false, \ + .offload_channel = false, \ + .doorbell_mode_switch = false, \ + .auto_queue = true, \ + } + #define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \ { \ .num_elements = el_count, \ @@ -213,7 +229,7 @@ static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = { MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0), MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0), MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0), - MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0), + MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0), MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2), From e703eaff5089da93fd379678f0371f52497042ba Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Wed, 21 Jul 2021 03:02:46 -0700 Subject: [PATCH 617/714] MAINTAINERS: Update for VMCI driver Add maintainer info for the VMware VMCI driver. v2: moved pv-drivers to L: as private list Acked-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/1626861766-11115-1-git-send-email-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6c8be735cc9105..61ff28cfdcdea1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19800,6 +19800,14 @@ L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c +VMWARE VMCI DRIVER +M: Jorgen Hansen +M: Vishnu Dasa +L: linux-kernel@vger.kernel.org +L: pv-drivers@vmware.com (private) +S: Maintained +F: drivers/misc/vmw_vmci/ + VMWARE VMMOUSE SUBDRIVER M: "VMware Graphics" M: "VMware, Inc." From ebea6761b620d758ed77d2df70fab1ae7a363151 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Wed, 14 Jul 2021 16:26:14 +0800 Subject: [PATCH 618/714] MAINTAINERS: Change ACRN HSM driver maintainer Shuo steps down, Fei will take over. Acked-by: Fei Li Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210714082614.88560-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 61ff28cfdcdea1..99316c4370a9a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -445,7 +445,7 @@ F: drivers/platform/x86/wmi.c F: include/uapi/linux/wmi.h ACRN HYPERVISOR SERVICE MODULE -M: Shuo Liu +M: Fei Li L: acrn-dev@lists.projectacrn.org (subscribers-only) S: Supported W: https://projectacrn.org From c453db6cd96418c79702eaf38259002755ab23ff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Jun 2021 12:40:24 +0200 Subject: [PATCH 619/714] nds32: fix up stack guard gap Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") fixed up all architectures to deal with the stack guard gap. But when nds32 was added to the tree, it forgot to do the same thing. Resolve this by properly fixing up the nsd32's version of arch_get_unmapped_area() Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Michal Hocko Cc: Hugh Dickins Cc: Qiang Liu Cc: stable Reported-by: iLifetruth Acked-by: Hugh Dickins Link: https://lore.kernel.org/r/20210629104024.2293615-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/nds32/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c index c206b31ce07ac4..1bdf5e7d1b4384 100644 --- a/arch/nds32/mm/mmap.c +++ b/arch/nds32/mm/mmap.c @@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } From 6c881ca0b3040f3e724eae513117ba4ddef86057 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 15 Jun 2021 11:57:26 +0100 Subject: [PATCH 620/714] afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) Signed-off-by: David Howells Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Marc Dionne cc: Andrew Morton cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/cmservice.c | 25 ++++---------- include/trace/events/afs.h | 67 +++++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index d3c6bb22c5f489..a3f5de28be798d 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -29,16 +29,11 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); static int afs_deliver_yfs_cb_callback(struct afs_call *); -#define CM_NAME(name) \ - char afs_SRXCB##name##_name[] __tracepoint_string = \ - "CB." #name - /* * CB.CallBack operation type */ -static CM_NAME(CallBack); static const struct afs_call_type afs_SRXCBCallBack = { - .name = afs_SRXCBCallBack_name, + .name = "CB.CallBack", .deliver = afs_deliver_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, @@ -47,9 +42,8 @@ static const struct afs_call_type afs_SRXCBCallBack = { /* * CB.InitCallBackState operation type */ -static CM_NAME(InitCallBackState); static const struct afs_call_type afs_SRXCBInitCallBackState = { - .name = afs_SRXCBInitCallBackState_name, + .name = "CB.InitCallBackState", .deliver = afs_deliver_cb_init_call_back_state, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -58,9 +52,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = { /* * CB.InitCallBackState3 operation type */ -static CM_NAME(InitCallBackState3); static const struct afs_call_type afs_SRXCBInitCallBackState3 = { - .name = afs_SRXCBInitCallBackState3_name, + .name = "CB.InitCallBackState3", .deliver = afs_deliver_cb_init_call_back_state3, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -69,9 +62,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState3 = { /* * CB.Probe operation type */ -static CM_NAME(Probe); static const struct afs_call_type afs_SRXCBProbe = { - .name = afs_SRXCBProbe_name, + .name = "CB.Probe", .deliver = afs_deliver_cb_probe, .destructor = afs_cm_destructor, .work = SRXAFSCB_Probe, @@ -80,9 +72,8 @@ static const struct afs_call_type afs_SRXCBProbe = { /* * CB.ProbeUuid operation type */ -static CM_NAME(ProbeUuid); static const struct afs_call_type afs_SRXCBProbeUuid = { - .name = afs_SRXCBProbeUuid_name, + .name = "CB.ProbeUuid", .deliver = afs_deliver_cb_probe_uuid, .destructor = afs_cm_destructor, .work = SRXAFSCB_ProbeUuid, @@ -91,9 +82,8 @@ static const struct afs_call_type afs_SRXCBProbeUuid = { /* * CB.TellMeAboutYourself operation type */ -static CM_NAME(TellMeAboutYourself); static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { - .name = afs_SRXCBTellMeAboutYourself_name, + .name = "CB.TellMeAboutYourself", .deliver = afs_deliver_cb_tell_me_about_yourself, .destructor = afs_cm_destructor, .work = SRXAFSCB_TellMeAboutYourself, @@ -102,9 +92,8 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { /* * YFS CB.CallBack operation type */ -static CM_NAME(YFS_CallBack); static const struct afs_call_type afs_SRXYFSCB_CallBack = { - .name = afs_SRXCBYFS_CallBack_name, + .name = "YFSCB.CallBack", .deliver = afs_deliver_yfs_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 3ccf591b237406..9f73ed2cf06116 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -174,6 +174,34 @@ enum afs_vl_operation { afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ }; +enum afs_cm_operation { + afs_CB_CallBack = 204, /* AFS break callback promises */ + afs_CB_InitCallBackState = 205, /* AFS initialise callback state */ + afs_CB_Probe = 206, /* AFS probe client */ + afs_CB_GetLock = 207, /* AFS get contents of CM lock table */ + afs_CB_GetCE = 208, /* AFS get cache file description */ + afs_CB_GetXStatsVersion = 209, /* AFS get version of extended statistics */ + afs_CB_GetXStats = 210, /* AFS get contents of extended statistics data */ + afs_CB_InitCallBackState3 = 213, /* AFS initialise callback state, version 3 */ + afs_CB_ProbeUuid = 214, /* AFS check the client hasn't rebooted */ +}; + +enum yfs_cm_operation { + yfs_CB_Probe = 206, /* YFS probe client */ + yfs_CB_GetLock = 207, /* YFS get contents of CM lock table */ + yfs_CB_XStatsVersion = 209, /* YFS get version of extended statistics */ + yfs_CB_GetXStats = 210, /* YFS get contents of extended statistics data */ + yfs_CB_InitCallBackState3 = 213, /* YFS initialise callback state, version 3 */ + yfs_CB_ProbeUuid = 214, /* YFS check the client hasn't rebooted */ + yfs_CB_GetServerPrefs = 215, + yfs_CB_GetCellServDV = 216, + yfs_CB_GetLocalCell = 217, + yfs_CB_GetCacheConfig = 218, + yfs_CB_GetCellByNum = 65537, + yfs_CB_TellMeAboutYourself = 65538, /* get client capabilities */ + yfs_CB_CallBack = 64204, +}; + enum afs_edit_dir_op { afs_edit_dir_create, afs_edit_dir_create_error, @@ -436,6 +464,32 @@ enum afs_cb_break_reason { EM(afs_YFSVL_GetCellName, "YFSVL.GetCellName") \ E_(afs_VL_GetCapabilities, "VL.GetCapabilities") +#define afs_cm_operations \ + EM(afs_CB_CallBack, "CB.CallBack") \ + EM(afs_CB_InitCallBackState, "CB.InitCallBackState") \ + EM(afs_CB_Probe, "CB.Probe") \ + EM(afs_CB_GetLock, "CB.GetLock") \ + EM(afs_CB_GetCE, "CB.GetCE") \ + EM(afs_CB_GetXStatsVersion, "CB.GetXStatsVersion") \ + EM(afs_CB_GetXStats, "CB.GetXStats") \ + EM(afs_CB_InitCallBackState3, "CB.InitCallBackState3") \ + E_(afs_CB_ProbeUuid, "CB.ProbeUuid") + +#define yfs_cm_operations \ + EM(yfs_CB_Probe, "YFSCB.Probe") \ + EM(yfs_CB_GetLock, "YFSCB.GetLock") \ + EM(yfs_CB_XStatsVersion, "YFSCB.XStatsVersion") \ + EM(yfs_CB_GetXStats, "YFSCB.GetXStats") \ + EM(yfs_CB_InitCallBackState3, "YFSCB.InitCallBackState3") \ + EM(yfs_CB_ProbeUuid, "YFSCB.ProbeUuid") \ + EM(yfs_CB_GetServerPrefs, "YFSCB.GetServerPrefs") \ + EM(yfs_CB_GetCellServDV, "YFSCB.GetCellServDV") \ + EM(yfs_CB_GetLocalCell, "YFSCB.GetLocalCell") \ + EM(yfs_CB_GetCacheConfig, "YFSCB.GetCacheConfig") \ + EM(yfs_CB_GetCellByNum, "YFSCB.GetCellByNum") \ + EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \ + E_(yfs_CB_CallBack, "YFSCB.CallBack") + #define afs_edit_dir_ops \ EM(afs_edit_dir_create, "create") \ EM(afs_edit_dir_create_error, "c_fail") \ @@ -569,6 +623,8 @@ afs_server_traces; afs_cell_traces; afs_fs_operations; afs_vl_operations; +afs_cm_operations; +yfs_cm_operations; afs_edit_dir_ops; afs_edit_dir_reasons; afs_eproto_causes; @@ -649,20 +705,21 @@ TRACE_EVENT(afs_cb_call, TP_STRUCT__entry( __field(unsigned int, call ) - __field(const char *, name ) __field(u32, op ) + __field(u16, service_id ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->name = call->type->name; __entry->op = call->operation_ID; + __entry->service_id = call->service_id; ), - TP_printk("c=%08x %s o=%u", + TP_printk("c=%08x %s", __entry->call, - __entry->name, - __entry->op) + __entry->service_id == 2501 ? + __print_symbolic(__entry->op, yfs_cm_operations) : + __print_symbolic(__entry->op, afs_cm_operations)) ); TRACE_EVENT(afs_call, From afe6949862f77bcc14fa16ad7938a04e84586d6a Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 30 Apr 2021 08:50:31 -0700 Subject: [PATCH 621/714] afs: check function return Static analysis reports this problem write.c:773:29: warning: Assigned value is garbage or undefined mapping->writeback_index = next; ^ ~~~~ The call to afs_writepages_region() can return without setting next. So check the function return before using next. Changes: ver #2: - Need to fix the range_cyclic case also[1]. Fixes: e87b03f5830e ("afs: Prepare for use of THPs") Signed-off-by: Tom Rix Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/20210430155031.3287870-1-trix@redhat.com Link: https://lore.kernel.org/r/CAB9dFdvHsLsw7CMnB+4cgciWDSqVjuij4mH3TaXnHQB8sz5rHw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/162609464716.3133237.10354897554363093252.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162610727640.3408253.8687445613469681311.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/write.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 3104b62c208263..1ed62e0ccfe57c 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -771,13 +771,19 @@ int afs_writepages(struct address_space *mapping, if (wbc->range_cyclic) { start = mapping->writeback_index * PAGE_SIZE; ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); - if (start > 0 && wbc->nr_to_write > 0 && ret == 0) - ret = afs_writepages_region(mapping, wbc, 0, start, - &next); - mapping->writeback_index = next / PAGE_SIZE; + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = afs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); - if (wbc->nr_to_write > 0) + if (wbc->nr_to_write > 0 && ret == 0) mapping->writeback_index = next; } else { ret = afs_writepages_region(mapping, wbc, From 5a972474cf685bf99ca430979657095bda3a15c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Jul 2021 17:04:47 +0100 Subject: [PATCH 622/714] afs: Fix setting of writeback_index Fix afs_writepages() to always set mapping->writeback_index to a page index and not a byte position[1]. Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/CAB9dFdvHsLsw7CMnB+4cgciWDSqVjuij4mH3TaXnHQB8sz5rHw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/162610728339.3408253.4604750166391496546.stgit@warthog.procyon.org.uk/ # v2 (no v1) --- fs/afs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 1ed62e0ccfe57c..c0534697268ef8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -784,7 +784,7 @@ int afs_writepages(struct address_space *mapping, } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next; + mapping->writeback_index = next / PAGE_SIZE; } else { ret = afs_writepages_region(mapping, wbc, wbc->range_start, wbc->range_end, &next); From b428081282f85db8a0d4ae6206a8c39db9c8341b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 29 Apr 2021 18:18:12 +0800 Subject: [PATCH 623/714] afs: Remove redundant assignment to ret Variable ret is set to -ENOENT and -ENOMEM but this value is never read as it is overwritten or not used later on, hence it is a redundant assignment and can be removed. Cleans up the following clang-analyzer warning: fs/afs/dir.c:2014:4: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores]. fs/afs/dir.c:659:2: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores]. [DH made the following modifications: - In afs_rename(), -ENOMEM should be placed in op->error instead of ret, rather than the assignment being removed entirely. afs_put_operation() will pick it up from there and return it. - If afs_sillyrename() fails, its error code should be placed in op->error rather than in ret also. ] Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/1619691492-83866-1-git-send-email-jiapeng.chong@linux.alibaba.com Link: https://lore.kernel.org/r/162609465444.3133237.7562832521724298900.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162610729052.3408253.17364333638838151299.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/dir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 78719f2f567e9f..ac829e63c5704c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -656,7 +656,6 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, return ret; } - ret = -ENOENT; if (!cookie.found) { _leave(" = -ENOENT [not found]"); return -ENOENT; @@ -2020,17 +2019,20 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, if (d_count(new_dentry) > 2) { /* copy the target dentry's name */ - ret = -ENOMEM; op->rename.tmp = d_alloc(new_dentry->d_parent, &new_dentry->d_name); - if (!op->rename.tmp) + if (!op->rename.tmp) { + op->error = -ENOMEM; goto error; + } ret = afs_sillyrename(new_dvnode, AFS_FS_I(d_inode(new_dentry)), new_dentry, op->key); - if (ret) + if (ret) { + op->error = ret; goto error; + } op->dentry_2 = op->rename.tmp; op->rename.rehash = NULL; From 4afa0c22eed33cfe0c590742387f0d16f32412f3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Jul 2021 12:34:38 +0300 Subject: [PATCH 624/714] driver core: auxiliary bus: Fix memory leak when driver_register() fail If driver_register() returns with error we need to free the memory allocated for auxdrv->driver.name before returning from __auxiliary_driver_register() Fixes: 7de3697e9cbd4 ("Add auxiliary bus support") Reviewed-by: Dan Williams Cc: stable Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210713093438.3173-1-peter.ujfalusi@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index adc199dfba3cb3..6a30264ab2ba12 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -231,6 +231,8 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device); int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, const char *modname) { + int ret; + if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table)) return -EINVAL; @@ -246,7 +248,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, auxdrv->driver.bus = &auxiliary_bus_type; auxdrv->driver.mod_name = modname; - return driver_register(&auxdrv->driver); + ret = driver_register(&auxdrv->driver); + if (ret) + kfree(auxdrv->driver.name); + + return ret; } EXPORT_SYMBOL_GPL(__auxiliary_driver_register); From e9a72f874d5b95cef0765bafc56005a50f72c5fe Mon Sep 17 00:00:00 2001 From: Sayanta Pattanayak Date: Tue, 20 Jul 2021 17:17:40 +0100 Subject: [PATCH 625/714] r8169: Avoid duplicate sysfs entry creation error When registering the MDIO bus for a r8169 device, we use the PCI bus/device specifier as a (seemingly) unique device identifier. However the very same BDF number can be used on another PCI segment, which makes the driver fail probing: [ 27.544136] r8169 0002:07:00.0: enabling device (0000 -> 0003) [ 27.559734] sysfs: cannot create duplicate filename '/class/mdio_bus/r8169-700' .... [ 27.684858] libphy: mii_bus r8169-700 failed to register [ 27.695602] r8169: probe of 0002:07:00.0 failed with error -22 Add the segment number to the device name to make it more unique. This fixes operation on ARM N1SDP boards, with two boards connected together to form an SMP system, and all on-board devices showing up twice, just on different PCI segments. A similar issue would occur on large systems with many PCI slots and multiple RTL8169 NICs. Fixes: f1e911d5d0dfd ("r8169: add basic phylib support") Signed-off-by: Sayanta Pattanayak [Andre: expand commit message, use pci_domain_nr()] Signed-off-by: Andre Przywara Acked-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f744557c33a3f2..c7af5bc3b8afff 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5084,7 +5084,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev)); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", + pci_domain_nr(pdev->bus), pci_dev_id(pdev)); new_bus->read = r8169_mdio_read_reg; new_bus->write = r8169_mdio_write_reg; From 0077a50082729c3f9ea2836f59e35d9b7dacfb12 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jul 2021 12:16:26 +0200 Subject: [PATCH 626/714] rbd: resurrect setting of disk->private_data in rbd_init_disk() rbd_open() and rbd_release() expect that disk->private_data is set to rbd_dev. Otherwise we hit a NULL pointer dereference when mapping the image. URL: https://tracker.ceph.com/issues/51759 Fixes: 195b1956b85b ("rbd: use blk_mq_alloc_disk and blk_cleanup_disk") Signed-off-by: Ilya Dryomov Reviewed-by: Christoph Hellwig --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 784797fa9a537c..90b947c9640226 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4943,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; } disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ From aaeb7bb061be545251606f4d9c82d710ca2a7c8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 10:00:11 +0200 Subject: [PATCH 627/714] nvme: set the PRACT bit when using Write Zeroes with T10 PI When using Write Zeroes on a namespace that has protection information enabled they behavior without the PRACT bit counter-intuitive and will generally lead to validation failures when reading the written blocks. Fix this by always setting the PRACT bit that generates matching PI data on the fly. Fixes: 6e02318eaea5 ("nvme: add support for the Write Zeroes command") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen --- drivers/nvme/host/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 17c05a4595f0c4..dfd9dec0c1f605 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -900,7 +900,10 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); - cmnd->write_zeroes.control = 0; + if (nvme_ns_has_pi(ns)) + cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); + else + cmnd->write_zeroes.control = 0; return BLK_STS_OK; } From e64daad660a0c9ace3acdc57099fffe5ed83f977 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 16 Jul 2021 14:44:07 +0300 Subject: [PATCH 628/714] driver core: Prevent warning when removing a device link from unregistered consumer sysfs_remove_link() causes a warning if the parent directory does not exist. That can happen if the device link consumer has not been registered. So do not attempt sysfs_remove_link() in that case. Fixes: 287905e68dd29 ("driver core: Expose device link details in sysfs") Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # 5.9+ Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210716114408.17320-2-adrian.hunter@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index cadcade658253c..f6360490a4a30d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -574,8 +574,10 @@ static void devlink_remove_symlinks(struct device *dev, return; } - snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); - sysfs_remove_link(&con->kobj, buf); + if (device_is_registered(con)) { + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); + sysfs_remove_link(&con->kobj, buf); + } snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); From 58acd10092268831e49de279446c314727101292 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 20 Jul 2021 16:07:01 -0400 Subject: [PATCH 629/714] sctp: update active_key for asoc when old key is being replaced syzbot reported a call trace: BUG: KASAN: use-after-free in sctp_auth_shkey_hold+0x22/0xa0 net/sctp/auth.c:112 Call Trace: sctp_auth_shkey_hold+0x22/0xa0 net/sctp/auth.c:112 sctp_set_owner_w net/sctp/socket.c:131 [inline] sctp_sendmsg_to_asoc+0x152e/0x2180 net/sctp/socket.c:1865 sctp_sendmsg+0x103b/0x1d30 net/sctp/socket.c:2027 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:821 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:723 This is an use-after-free issue caused by not updating asoc->shkey after it was replaced in the key list asoc->endpoint_shared_keys, and the old key was freed. This patch is to fix by also updating active_key for asoc when old key is being replaced with a new one. Note that this issue doesn't exist in sctp_auth_del_key_id(), as it's not allowed to delete the active_key from the asoc. Fixes: 1b1e0bc99474 ("sctp: add refcnt support for sh_key") Reported-by: syzbot+b774577370208727d12b@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 6f8319b828b0d1..fe74c5f956303f 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -860,6 +860,8 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, if (replace) { list_del_init(&shkey->key_list); sctp_auth_shkey_release(shkey); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); } list_add(&cur_key->key_list, sh_keys); From 9bfce73c8921c92a9565562e6e7d458d37b7ce80 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:35:28 +0300 Subject: [PATCH 630/714] udp: check encap socket in __udp_lib_err Commit d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err") added checks for encapsulated sockets but it broke cases when there is no implementation of encap_err_lookup for encapsulation, i.e. ESP in UDP encapsulation. Fix it by calling encap_err_lookup only if socket implements this method otherwise treat it as legal socket. Fixes: d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err") Signed-off-by: Vadim Fedorenko Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/udp.c | 25 +++++++++++++++++++------ net/ipv6/udp.c | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62cd4cd52e8481..1a742b710e543e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, u32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, iph->ihl << 2); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, iph->saddr, uh->dest, skb->dev->ifindex, 0, udptable, NULL); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); @@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udp_encap_needed_key)) { - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, + sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0cc7ba531b3415..c5e15e94bb0042 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); @@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, skb, + udptable, sk, skb, opt, type, code, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), From 6e0b6528d783b2b87bd9e1bea97cf4dac87540d7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:21:08 -0500 Subject: [PATCH 631/714] drm/i915: Correct the docs for intel_engine_cmd_parser In 93b713304188 ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser""), the parameters to intel_engine_cmd_parser() were altered without updating the docs, causing Fi.CI.DOCS to start failing. Fixes: c9d9fdbc108a ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"") Signed-off-by: Jason Ekstrand Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210720182108.2761496-1-jason@jlekstrand.net Signed-off-by: Rodrigo Vivi [Added 'Fixes:' tag and corrected the hash for the ancestor] (cherry picked from commit 15eb083bdb561bb4862cd04cd0523e55483e877e) Signed-off-by: Rodrigo Vivi [Updated Fixes tag to match fixes branch] --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 00ec618d015900..a3b4d99d64b91b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1416,9 +1416,7 @@ static unsigned long *alloc_whitelist(u32 batch_length) * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist() - * @shadow_map: mapping to @shadow vma - * @batch_map: mapping to @batch vma + * @trampoline: true if we need to trampoline into privileged execution * * Parses the specified batch buffer looking for privilege violations as * described in the overview. From ece1278a9b81bdfc088f087f8372a072b7010956 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:35:29 +0300 Subject: [PATCH 632/714] selftests: net: add ESP-in-UDP PMTU test The case of ESP in UDP encapsulation was not covered before. Add cases of local changes of MTU and difference on routed path. Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- tools/testing/selftests/net/nettest.c | 55 ++++++- tools/testing/selftests/net/pmtu.sh | 212 +++++++++++++++++++++++++- 2 files changed, 260 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 6365c7fd1262ad..bd6288302094b5 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -27,6 +29,10 @@ #include #include +#include +#include +#include + #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 #endif @@ -114,6 +120,9 @@ struct sock_args { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; }; static int server_mode; @@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1795,6 +1844,7 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1966,6 +2016,9 @@ int main(int argc, char *argv[]) case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; default: print_usage(argv[0]); return 1; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 64cd2e23c5687a..543ad7513a8e9f 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -118,6 +118,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +135,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -224,6 +241,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}" ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" - # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -279,7 +299,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -326,6 +345,7 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= err() { err_buf="${err_buf}${1} @@ -548,6 +568,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -619,18 +647,36 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + which nettest >/dev/null + if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" + + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -639,6 +685,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -823,6 +889,11 @@ cleanup() { done tcpdump_pids= + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done @@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { setup namespaces veth vti4 || return $ksft_skip From f5a11c69b69923a4367d24365ad4dff6d4f3fc42 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 21 Jul 2021 15:55:43 +0000 Subject: [PATCH 633/714] Revert "x86/hyperv: fix logical processor creation" This reverts commit 450605c28d571eddca39a65fdbc1338add44c6d9. Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index cc8f1773deca74..c890d67a64ad0e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -237,7 +237,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) for_each_present_cpu(i) { if (i == 0) continue; - ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i); + ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); BUG_ON(ret); } From bb55362bd6976631b662ca712779b6532d8de0a6 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 20 Jul 2021 19:34:39 -0700 Subject: [PATCH 634/714] ibmvnic: Remove the proper scrq flush Commit 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") intended to remove the call to ibmvnic_tx_scrq_flush() when the ->resetting flag is true and was tested that way. But during the final rebase to net-next, the hunk got applied to a block few lines below (which happened to have the same diff context) and the wrong call to ibmvnic_tx_scrq_flush() got removed. Fix that by removing the correct ibmvnic_tx_scrq_flush() and restoring the one that was incorrectly removed. Fixes: 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") Reported-by: Dany Madden Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ed77191d19f44c..a775c69e4fd7f0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1731,7 +1731,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1753,6 +1752,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; goto out; } From 161dcc02428858fe338b7493158ed6f5fc2a8f26 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:19:32 +0200 Subject: [PATCH 635/714] net: ixp46x: fix ptp build failure The rework of the ixp46x cpu detection left the network driver in a half broken state: drivers/net/ethernet/xscale/ptp_ixp46x.c: In function 'ptp_ixp_init': drivers/net/ethernet/xscale/ptp_ixp46x.c:290:51: error: 'IXP4XX_TIMESYNC_BASE_VIRT' undeclared (first use in this function) 290 | (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT; | ^~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/xscale/ptp_ixp46x.c:290:51: note: each undeclared identifier is reported only once for each function it appears in drivers/net/ethernet/xscale/ptp_ixp46x.c: At top level: drivers/net/ethernet/xscale/ptp_ixp46x.c:323:1: error: data definition has no type or storage class [-Werror] 323 | module_init(ptp_ixp_init); I have patches to complete the transition for a future release, but for the moment, add the missing include statements to get it to build again. Fixes: 09aa9aabdcc4 ("soc: ixp4xx: move cpu detection to linux/soc/ixp4xx/cpu.h") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ptp_ixp46x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 99d4d9439d059d..a6fb88fd42f71e 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "ixp46x_ts.h" From d6a206e60124a9759dd7f6dfb86b0e1d3b1df82e Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 21 Jul 2021 17:17:45 +0100 Subject: [PATCH 636/714] USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick Add the USB serial device ID for the CEL ZigBee EM3588 radio stick. Signed-off-by: John Keeping Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index af286240807e42..3c80bfbf3bec98 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ + { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ From ec3102dc6b36c692104c4a0546d4119de59a3bc1 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 24 Jun 2021 11:05:42 -0400 Subject: [PATCH 637/714] drm/amd/display: Fix comparison error in dcn21 DML [why] A comparison error made it possible to not iterate through all the specified prefetch modes. [how] Correct "<" to "<=" Reviewed-by: Dmytro Laktyushkin Reviewed-by: Yongqiang Sun Acked-by: Rodrigo Siqueira Signed-off-by: Victor Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index c26e742e813773..d25a7d38d21fee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -4889,7 +4889,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] - || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); + || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; From 32f1d0cfc3444fb44ff1dba10d28e479690bdd3e Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 23 Jun 2021 15:48:02 -0400 Subject: [PATCH 638/714] drm/amd/display: implement workaround for riommu related hang [Why] During S4/S5/reboot, sometimes riommu invalidation request arrive too early, DCN may be unable to respond to the invalidation request resulting in pstate hang. [How] VBIOS will force allow pstate for riommu invalidation and driver will clear it after powering down display pipes. Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h | 4 +++- .../gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 18 ++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h | 1 + .../gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 2 +- .../drm/amd/display/dc/dcn31/dcn31_resource.c | 3 +++ .../amd/display/dc/inc/hw_sequencer_private.h | 1 + 6 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index df6539e4c73014..0464a8f3db3c0b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -636,6 +636,7 @@ struct dce_hwseq_registers { uint32_t ODM_MEM_PWR_CTRL3; uint32_t DMU_MEM_PWR_CNTL; uint32_t MMHUBBUB_MEM_PWR_CNTL; + uint32_t DCHUBBUB_ARB_HOSTVM_CNTL; }; /* set field name */ #define HWS_SF(blk_name, reg_name, field_name, post_fix)\ @@ -1110,7 +1111,8 @@ struct dce_hwseq_registers { type DOMAIN_POWER_FORCEON;\ type DOMAIN_POWER_GATE;\ type DOMAIN_PGFSM_PWR_STATUS;\ - type HPO_HDMISTREAMCLK_G_GATE_DIS; + type HPO_HDMISTREAMCLK_G_GATE_DIS;\ + type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 836864a5a5dc0b..6ac6faf0c533ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -47,6 +47,7 @@ #include "dce/dmub_outbox.h" #include "dc_link_dp.h" #include "inc/link_dpcd.h" +#include "dcn10/dcn10_hw_sequencer.h" #define DC_LOGGER_INIT(logger) @@ -594,3 +595,20 @@ bool dcn31_is_abm_supported(struct dc *dc, } return false; } + +static void apply_riommu_invalidation_wa(struct dc *dc) +{ + struct dce_hwseq *hws = dc->hwseq; + + if (!hws->wa.early_riommu_invalidation) + return; + + REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0); +} + +void dcn31_init_pipes(struct dc *dc, struct dc_state *context) +{ + dcn10_init_pipes(dc, context); + apply_riommu_invalidation_wa(dc); + +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index ff72f0fdd5befe..40dfebe78fdd12 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -52,5 +52,6 @@ void dcn31_reset_hw_ctx_wrap( struct dc_state *context); bool dcn31_is_abm_supported(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); +void dcn31_init_pipes(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index e3048f8827d2b5..de74f62f96cdb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -104,7 +104,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { }; static const struct hwseq_private_funcs dcn31_private_funcs = { - .init_pipes = dcn10_init_pipes, + .init_pipes = dcn31_init_pipes, .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index c67bc9544f5d9d..3fe0aac4aaa677 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -741,6 +741,7 @@ static const struct dccg_mask dccg_mask = { #define HWSEQ_DCN31_REG_LIST()\ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ SR(DIO_MEM_PWR_CTRL), \ SR(ODM_MEM_PWR_CTRL3), \ SR(DMU_MEM_PWR_CNTL), \ @@ -801,6 +802,7 @@ static const struct dce_hwseq_registers hwseq_reg = { #define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ @@ -1299,6 +1301,7 @@ static struct dce_hwseq *dcn31_hwseq_create( hws->regs = &hwseq_reg; hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; + hws->wa.early_riommu_invalidation = true; } return hws; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index f7f7e4fff0c294..082549f7597845 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -41,6 +41,7 @@ struct dce_hwseq_wa { bool DEGVIDCN10_254; bool DEGVIDCN21; bool disallow_self_refresh_during_multi_plane_transition; + bool early_riommu_invalidation; }; struct hwseq_wa_state { From d7940911fc0754d99b208f0e3098762d39f403a0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 13:19:14 -0400 Subject: [PATCH 639/714] drm/amd/display: Fix max vstartup calculation for modes with borders [Why] Vertical and horizontal borders in timings are treated as increasing the active area - vblank and hblank actually shrink. Our input into DML does not include these borders so it incorrectly assumes it has more time than available for vstartup and tmdl calculations for some modes with borders. An example of such a timing would be 640x480@72Hz: h_total: 832 h_border_left: 8 h_addressable: 640 h_border_right: 8 h_front_porch: 16 h_sync_width: 40 v_total: 520 v_border_top: 8 v_addressable: 480 v_border_bottom: 8 v_front_porch: 1 v_sync_width: 3 pix_clk_100hz: 315000 [How] Include borders as part of destination vactive/hactive. This change DCN20+ so it has wide impact, but the destination vactive and hactive are only really used for vstartup calculation anyway. Most modes do not have vertical or horizontal borders. Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1b05a37b674d0a..98d21fb374b14c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context( - timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.htotal = timing->h_total; pipes[pipe_cnt].pipe.dest.vtotal = v_total; - pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable; - pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable; + pipes[pipe_cnt].pipe.dest.hactive = + timing->h_addressable + timing->h_border_left + timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vactive = + timing->v_addressable + timing->v_border_top + timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE; pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0; if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) From f30f55158728b4423cf1caf666dd2e2c1943dc19 Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Thu, 27 May 2021 10:11:32 -0400 Subject: [PATCH 640/714] drm/amd/display: Populate socclk entries for dcn3.02/3.03 [Why] Initialize socclk entries in bandwidth params for dcn302, dcn303. [How] Fetch the sockclk values from smu for the DPM levels and for the DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 7 +++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 513676a6f52bc2..af7004b770aeb9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_levels); + /* SOCCLK */ + dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, + &num_levels); // DPREFCLK ??? /* DISPCLK */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 16a75ba0ca824d..d65c097333a46e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1399,10 +1399,13 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz; dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 34b89464ae0224..f8b84722a389c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1327,10 +1327,13 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz; dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ From b0364fa4fc045685f827ea34c0149c953d0240bd Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 16:38:57 -0400 Subject: [PATCH 641/714] drm/amd/display: Query VCO frequency from register for DCN3.1 [Why] Hardcoding the VCO frequency isn't correct since we don't own or control the value. In the case where the hardcode is also missing we can't lightup display. [How] Query from the CLK register instead. Update the DFS frequency to be able to compute the VCO frequency. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 43 ++++++++++++++- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h | 54 ------------------- 2 files changed, 42 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 7b7d884d58be09..d15c628a2ab0a1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -48,6 +48,21 @@ #include "dc_dmub_srv.h" +#include "yellow_carp_offset.h" + +#define regCLK1_CLK_PLL_REQ 0x0237 +#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) @@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) { - return 0; + /* get FbMult value */ + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; + + /* + * Register value of fbmult is in 8.16 format, we are converting to 31.32 + * to leverage the fix point operations available in driver + */ + + REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/ + REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */ + + pll_req = dc_fixpt_from_int(fbmult_int_val); + + /* + * since fractional part is only 16 bit in register definition but is 32 bit + * in our fix point definiton, need to shift left by 16 to obtain correct value + */ + pll_req.value |= fbmult_frac_val << 16; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + /* integer part is now VCO frequency in kHz */ + return dc_fixpt_floor(pll_req); } static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct( clk_mgr->base.dprefclk_ss_percentage = 0; clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; + clk_mgr->base.dfs_ref_freq_khz = 48000; clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index cc21cf75eafd43..f8f100535526df 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -27,60 +27,6 @@ #define __DCN31_CLK_MGR_H__ #include "clk_mgr_internal.h" -//CLK1_CLK_PLL_REQ -#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L -//CLK1_CLK0_DFS_CNTL -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0 -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL -/*DPREF clock related*/ -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL - -//CLK3_0_CLK3_CLK_PLL_REQ -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L - -#define mmCLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60 -#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060 -#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260 - -#define mmCLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10 -#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010 -#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210 - -#define mmCLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D -#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D - -#define mmCLK2_CLK_PLL_REQ 0x17E0D - -/*AMCLK*/ -#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#endif - struct dcn31_watermarks; struct dcn31_smu_watermark_set { From a8e380fd8d71493623c94511f75e81786cffa223 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 8 Jul 2021 12:59:59 -0400 Subject: [PATCH 642/714] drm/amd/display: Update bounding box for DCN3.1 [Why & How] We're missing a default value for dram_channel_width_bytes in the DCN3.1 SOC bounding box and we don't currently have the interface in place to query the actual value from VBIOS. Put in a hardcoded default until we have the interface in place. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 3fe0aac4aaa677..38c010afade150 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .sr_exit_z8_time_us = 402.0, .sr_enter_plus_exit_z8_time_us = 520.0, .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From 2d73eabe2984a435737498ab39bb1500a9ffe9a9 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Thu, 8 Jul 2021 18:28:37 +0800 Subject: [PATCH 643/714] drm/amd/display: Only set default brightness for OLED [Why] We used to unconditionally set backlight path as AUX for panels capable of backlight adjustment via DPCD in set default brightness. [How] This should be limited to OLED panel only since we control backlight via PWM path for SDR mode in LCD HDR panel. Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6da226bf11d596..12066f5a53fc7c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4914,9 +4914,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link) { uint32_t default_backlight; - if (link && - (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || - link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { + if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!dc_link_read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; // if < 5 nits or > 5000, it might be wrong readback From 6580b28e0c734cea2f829e97827005c311293cb7 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Mon, 14 Jun 2021 20:21:42 -0400 Subject: [PATCH 644/714] drm/amd/display: Remove MALL function from DCN3.1 [why] DCN31 doesn't have MALL in DMUB so to avoid sending unknown commands to DMUB just remove the function pointer. [how] Remove apply_idle_power_optimizations from function pointers structure for DCN31 Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index de74f62f96cdb4..aaf2dbd095fe1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, From 0f984c942cd1703b26dc01351dc47b0c93bc32f3 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Fri, 9 Jul 2021 13:05:11 -0400 Subject: [PATCH 645/714] drm/amd/display: Line Buffer changes DCN 3x increased Line buffer size for DCHUB latency hiding, from 4 lines of 4K resolution lines to 5 lines of 4K resolution lines. All Line Buffer can be used as extended memory for P State change latency hiding. The maximum number of lines is increased to 32 lines. Finally, LB_MEMORY_CONFIG_1 (LB memory piece 1) and LB_MEMORY _CONFIG_2 (LB memory piece 2) are not affected, no change in size, only 3 pieces is affected, i.e., when all 3 pieces are used in both LB_MEMORY_CONFIG_0 and LB_MEMORY_CONFIG_3 (for 4:2:0) modes. Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Nevenko Stupar Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 7 ++++++- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 16 ---------------- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h | 3 +-- .../gpu/drm/amd/display/dc/inc/hw/transform.h | 3 +++ 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 673b93f4fea519..cb9767ddf93d38 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb( const struct line_buffer_params *lb_params, enum lb_memory_config mem_size_config) { + uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */ + /* LB */ if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL caps: pixel data processed in fixed format */ @@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb( LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ } + if (dpp->base.caps->max_lb_partitions == 31) + max_partitions = 31; + REG_SET_2(LB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, - LB_MAX_PARTITIONS, 63); + LB_MAX_PARTITIONS, max_partitions); } static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 2140b75540cfe1..23a52d47e61c4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ - if (scl_data->viewport.width != scl_data->h_active && - scl_data->viewport.height != scl_data->v_active && - dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && - scl_data->format == PIXEL_FORMAT_FP16) - return false; - if (scl_data->viewport.width > scl_data->h_active && dpp->ctx->dc->debug.max_downscale_src_width != 0 && scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) @@ -1440,15 +1433,6 @@ bool dpp3_construct( dpp->tf_shift = tf_shift; dpp->tf_mask = tf_mask; - dpp->lb_pixel_depth_supported = - LB_PIXEL_DEPTH_18BPP | - LB_PIXEL_DEPTH_24BPP | - LB_PIXEL_DEPTH_30BPP | - LB_PIXEL_DEPTH_36BPP; - - dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY; - dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/ - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h index 3fa86cd090a087..ac644ae6b9f26a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h @@ -154,6 +154,7 @@ SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \ SRI(CURSOR_CONTROL, CURSOR0_, id),\ SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\ + SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ SRI(DSCL_MEM_PWR_CTRL, DSCL, id) #define DPP_REG_LIST_DCN30(id)\ @@ -163,8 +164,6 @@ SRI(CM_SHAPER_LUT_DATA, CM, id),\ SRI(CM_MEM_PWR_CTRL2, CM, id), \ SRI(CM_MEM_PWR_STATUS2, CM, id), \ - SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 2a0db2b03047e3..9ac9d5e8df8b91 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -289,6 +289,9 @@ struct dpp_caps { /* DSCL processing pixel data in fixed or float format */ enum dscl_data_processing_format dscl_data_proc_format; + /* max LB partitions */ + unsigned int max_lb_partitions; + /* Calculates the number of partitions in the line buffer. * The implementation of this function is overloaded for * different versions of DSCL LB. From ce350c6e786ef4bc3a4ddb58f8bdf2e48219fdaa Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Fri, 9 Jul 2021 10:35:33 -0400 Subject: [PATCH 646/714] drm/amd/display: Populate dtbclk entries for dcn3.02/3.03 [Why] Populate dtbclk values from bwparams for dcn302, dcn303. [How] dtbclk values are fetched from bandwidthparams for all DPM levels and for DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 6 +++++- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index d65c097333a46e..7d3ff5d4440235 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1398,7 +1398,11 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index f8b84722a389c9..833ab13fa83401 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1326,7 +1326,11 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; else From 93b6bd307a54ea62bfcf89748c80bd25b7bd3205 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 30 Jun 2021 18:22:51 -0400 Subject: [PATCH 647/714] drm/amd/display: change zstate allow msg condition [Why] PMFW message which previously thought to only control Z9 controls both Z9 and Z10. Also HW design team requested that Z9 must only be supported on eDP due to content protection interop. [How] Change zstate support condition to match updated policy Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 16 +++---- drivers/gpu/drm/amd/display/dc/dc.h | 10 ++--- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 44 ++++++++++++++----- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index d15c628a2ab0a1..4a4894e9d9c9a1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -139,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, * also if safe to lower is false, we just go in the higher state */ if (safe_to_lower) { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, true); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { @@ -163,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, } } } else { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, false); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { @@ -286,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } static bool dcn31_are_clock_states_equal(struct dc_clocks *a, @@ -300,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->z9_support != b->z9_support) + else if (a->zstate_support != b->zstate_support) return false; else if (a->dtbclk_en != b->dtbclk_en) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 45640f1c26c4bf..8dcea8ff5c5ad7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -354,10 +354,10 @@ enum dcn_pwr_state { }; #if defined(CONFIG_DRM_AMD_DC_DCN) -enum dcn_z9_support_state { - DCN_Z9_SUPPORT_UNKNOWN, - DCN_Z9_SUPPORT_ALLOW, - DCN_Z9_SUPPORT_DISALLOW, +enum dcn_zstate_support_state { + DCN_ZSTATE_SUPPORT_UNKNOWN, + DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_DISALLOW, }; #endif /* @@ -378,7 +378,7 @@ struct dc_clocks { int dramclk_khz; bool p_state_change_support; #if defined(CONFIG_DRM_AMD_DC_DCN) - enum dcn_z9_support_state z9_support; + enum dcn_zstate_support_state zstate_support; bool dtbclk_en; #endif enum dcn_pwr_state pwr_state; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 98d21fb374b14c..b173fa3653b555 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3081,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) return false; } +static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + int plane_count; + int i; + + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* + * Zstate is allowed in following scenarios: + * 1. Single eDP with PSR enabled + * 2. 0 planes (No memory requests) + * 3. Single eDP without PSR but > 5ms stutter period + */ + if (plane_count == 0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + + if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled) + || context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else + return DCN_ZSTATE_SUPPORT_DISALLOW; + } else + return DCN_ZSTATE_SUPPORT_DISALLOW; +} + void dcn20_calculate_dlg_params( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -3088,7 +3119,6 @@ void dcn20_calculate_dlg_params( int vlevel) { int i, pipe_idx; - int plane_count; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3104,17 +3134,7 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); From 6588b101ed0a71a60fa7df0a18ed7db07026d109 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 15 Jul 2021 14:54:49 +0800 Subject: [PATCH 648/714] drm/amd/pm: Support board calibration on aldebaran Add support for board power calibration on Aldebaran. Board calibration is done after DC offset calibration. Signed-off-by: Lijo Lazar Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h | 3 +- drivers/gpu/drm/amd/pm/inc/smu_types.h | 3 +- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 46 +++++++++++++++---- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h index 610266088ff1c9..35fa0d8e92dd3a 100644 --- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h @@ -101,7 +101,8 @@ #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41 #define PPSMC_MSG_GfxDriverResetRecovery 0x42 -#define PPSMC_Message_Count 0x43 +#define PPSMC_MSG_BoardPowerCalibration 0x43 +#define PPSMC_Message_Count 0x44 //PPSMC Reset Types #define PPSMC_RESET_TYPE_WARM_RESET 0x00 diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 89a16dcd0fff90..1d3765b873df4d 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -225,7 +225,8 @@ __SMU_DUMMY_MAP(DisableDeterminism), \ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ - __SMU_DUMMY_MAP(GfxDriverResetRecovery), + __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ + __SMU_DUMMY_MAP(BoardPowerCalibration), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 9316a726195cc0..cb5485cf243f38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0), MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), + MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), }; static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { @@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu) return ret; } +static bool aldebaran_is_primary(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) + return adev->smuio.funcs->get_die_id(adev) == 0; + + return true; +} + +static int aldebaran_run_board_btc(struct smu_context *smu) +{ + u32 smu_version; + int ret; + + if (!aldebaran_is_primary(smu)) + return 0; + + ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (ret) { + dev_err(smu->adev->dev, "Failed to get smu version!\n"); + return ret; + } + if (smu_version <= 0x00441d00) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL); + if (ret) + dev_err(smu->adev->dev, "Board power calibration failed!\n"); + + return ret; +} + static int aldebaran_run_btc(struct smu_context *smu) { int ret; @@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); if (ret) dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + else + ret = aldebaran_run_board_btc(smu); return ret; } @@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1, return (abs(frequency1 - frequency2) <= EPSILON); } -static bool aldebaran_is_primary(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) - return adev->smuio.funcs->get_die_id(adev) == 0; - - return true; -} - static int aldebaran_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) From ab7a11bd36ca6cd4d4dab2846eaacafaa5963cc1 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 2 Jun 2021 10:32:41 +0800 Subject: [PATCH 649/714] drm/amdgpu: update yellow carp external rev_id handling 0x1681 has a different external revision id. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index cf73a6923203d4..f589b8334be66b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1236,7 +1236,10 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - adev->external_rev_id = adev->rev_id + 0x01; + if (adev->pdev->device == 0x1681) + adev->external_rev_id = adev->rev_id + 0x19; + else + adev->external_rev_id = adev->rev_id + 0x01; break; default: /* FIXME: not supported yet */ From 27f5355f5d9706dfc1c2542253689f421008c969 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 4 Nov 2020 13:04:06 +0800 Subject: [PATCH 650/714] drm/amdgpu: add yellow carp pci id (v2) Add Yellow Carp PCI id support. v2: add another DID Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index abb928894eac0d..361b86b71b561e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1190,6 +1190,10 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Yellow Carp */ + {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + /* Navy_Flounder */ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, From 6be50f5d83adc9541de3d5be26e968182b5ac150 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Wed, 21 Jul 2021 12:25:24 +0800 Subject: [PATCH 651/714] drm/amd/display: Fix ASSR regression on embedded panels [Why] Regression found in some embedded panels traces back to the earliest upstreamed ASSR patch. The changed code flow are causing problems with some panels. [How] - Change ASSR enabling code while preserving original code flow as much as possible - Simplify the code on guarding with internal display flag Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213779 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1620 Reviewed-by: Alex Deucher Signed-off-by: Stylon Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 12066f5a53fc7c..9fb8c46dc60698 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1820,8 +1820,7 @@ bool perform_link_training_with_retries( */ panel_mode = DP_PANEL_MODE_DEFAULT; } - } else - panel_mode = DP_PANEL_MODE_DEFAULT; + } } #endif @@ -4650,7 +4649,10 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link) } } - if (link->dpcd_caps.panel_mode_edp) { + if (link->dpcd_caps.panel_mode_edp && + (link->connector_signal == SIGNAL_TYPE_EDP || + (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + link->is_internal_display))) { return DP_PANEL_MODE_EDP; } From 02dc2ee7c7476dd831df63d2b10cc0a162a531f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 21 Jul 2021 14:45:54 -0400 Subject: [PATCH 652/714] sctp: do not update transport pathmtu if SPP_PMTUD_ENABLE is not set Currently, in sctp_packet_config(), sctp_transport_pmtu_check() is called to update transport pathmtu with dst's mtu when dst's mtu has been changed by non sctp stack like xfrm. However, this should only happen when SPP_PMTUD_ENABLE is set, no matter where dst's mtu changed. This patch is to fix by checking SPP_PMTUD_ENABLE flag before calling sctp_transport_pmtu_check(). Thanks Jacek for reporting and looking into this issue. v1->v2: - add the missing "{" to fix the build error. Fixes: 69fec325a643 ('Revert "sctp: remove sctp_transport_pmtu_check"') Reported-by: Jacek Szafraniec Tested-by: Jacek Szafraniec Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 9032ce60d50e81..4dfb5ea82b05b0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -104,8 +104,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pl_enabled(tp) && - !sctp_transport_pmtu_check(tp)) { - if (asoc->param_flags & SPP_PMTUD_ENABLE) + asoc->param_flags & SPP_PMTUD_ENABLE) { + if (!sctp_transport_pmtu_check(tp)) sctp_assoc_sync_pmtu(asoc); } From d80cded9cc25f841d5250d2e94a7b42be1e81c97 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Mon, 19 Jul 2021 19:06:23 +0530 Subject: [PATCH 653/714] drm/amdgpu - Corrected the video codecs array name for yellow carp Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index f589b8334be66b..94d029dbf30da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -173,8 +173,8 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { - .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), - .codec_array = bg_video_codecs_decode_array, + .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array), + .codec_array = yc_video_codecs_decode_array, }; static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, From d0e4dae74470fb709fc0ab61862c317938f4cc4d Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Sun, 27 Jun 2021 21:51:17 +0800 Subject: [PATCH 654/714] riscv: Fix 32-bit RISC-V boot failure Commit dd2d082b5760 ("riscv: Cleanup setup_bootmem()") adjusted the calling sequence in setup_bootmem(), which invalidates the fix commit de043da0b9e7 ("RISC-V: Fix usage of memblock_enforce_memory_limit") did for 32-bit RISC-V unfortunately. So now 32-bit RISC-V does not boot again when testing booting kernel on QEMU 'virt' with '-m 2G', which was exactly what the original commit de043da0b9e7 ("RISC-V: Fix usage of memblock_enforce_memory_limit") tried to fix. Fixes: dd2d082b5760 ("riscv: Cleanup setup_bootmem()") Signed-off-by: Bin Meng Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4faf8bd157eaa9..0fc72603c699d0 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -123,7 +123,7 @@ void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t dram_end = memblock_end_of_DRAM(); + phys_addr_t dram_end; phys_addr_t max_mapped_addr = __pa(~(ulong)0); #ifdef CONFIG_XIP_KERNEL @@ -146,6 +146,8 @@ void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + dram_end = memblock_end_of_DRAM(); + /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE From 213ad73d06073b197a02476db3a4998e219ddb06 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 21 Jul 2021 10:27:38 -0700 Subject: [PATCH 655/714] tcp: disable TFO blackhole logic by default Multiple complaints have been raised from the TFO users on the internet stating that the TFO blackhole logic is too aggressive and gets falsely triggered too often. (e.g. https://blog.apnic.net/2021/07/05/tcp-fast-open-not-so-fast/) Considering that most middleboxes no longer drop TFO packets, we decide to disable the blackhole logic by setting /proc/sys/net/ipv4/tcp_fastopen_blackhole_timeout_set to 0 by default. Fixes: cf1ef3f0719b4 ("net/tcp_fastopen: Disable active side TFO in certain scenarios") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/tcp_fastopen.c | 9 ++++++++- net/ipv4/tcp_ipv4.c | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b3fa522e4cd9d4..316c7dfa9693a3 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -826,7 +826,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER initial value when the blackhole issue goes away. 0 to disable the blackhole detection. - By default, it is set to 1hr. + By default, it is set to 0 (feature is disabled). tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs The list consists of a primary key and an optional backup key. The diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index b32af76e213253..25fa4c01a17f63 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -507,6 +507,9 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); + if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + return; + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); @@ -526,10 +529,14 @@ void tcp_fastopen_active_disable(struct sock *sk) bool tcp_fastopen_active_should_disable(struct sock *sk) { unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; - int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int tfo_da_times; int multiplier; + if (!tfo_bh_timeout) + return false; + + tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); if (!tfo_da_times) return false; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9dc2d6197be8b..a692626c19e443 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2965,7 +2965,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); - net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); /* Reno is always built in */ From e40cba9490bab1414d45c2d62defc0ad4f6e4136 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 21 Jul 2021 15:37:59 +0300 Subject: [PATCH 656/714] net: dsa: sja1105: make VID 4095 a bridge VLAN too This simple series of commands: ip link add br0 type bridge vlan_filtering 1 ip link set swp0 master br0 fails on sja1105 with the following error: [ 33.439103] sja1105 spi0.1: vlan-lookup-table needs to have at least the default untagged VLAN [ 33.447710] sja1105 spi0.1: Invalid config, cannot upload Warning: sja1105: Failed to change VLAN Ethertype. For context, sja1105 has 3 operating modes: - SJA1105_VLAN_UNAWARE: the dsa_8021q_vlans are committed to hardware - SJA1105_VLAN_FILTERING_FULL: the bridge_vlans are committed to hardware - SJA1105_VLAN_FILTERING_BEST_EFFORT: both the dsa_8021q_vlans and the bridge_vlans are committed to hardware Swapping out a VLAN list and another in happens in sja1105_build_vlan_table(), which performs a delta update procedure. That function is called from a few places, notably from sja1105_vlan_filtering() which is called from the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING handler. The above set of 2 commands fails when run on a kernel pre-commit 8841f6e63f2c ("net: dsa: sja1105: make devlink property best_effort_vlan_filtering true by default"). So the priv->vlan_state transition that takes place is between VLAN-unaware and full VLAN filtering. So the dsa_8021q_vlans are swapped out and the bridge_vlans are swapped in. So why does it fail? Well, the bridge driver, through nbp_vlan_init(), first sets up the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING attribute, and only then proceeds to call nbp_vlan_add for the default_pvid. So when we swap out the dsa_8021q_vlans and swap in the bridge_vlans in the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING handler, there are no bridge VLANs (yet). So we have wiped the VLAN table clean, and the low-level static config checker complains of an invalid configuration. We _will_ add the bridge VLANs using the dynamic config interface, albeit later, when nbp_vlan_add() calls us. So it is natural that it fails. So why did it ever work? Surprisingly, it looks like I only tested this configuration with 2 things set up in a particular way: - a network manager that brings all ports up - a kernel with CONFIG_VLAN_8021Q=y It is widely known that commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") installs VID 0 to every net device that comes up. DSA treats these VLANs as bridge VLANs, and therefore, in my testing, the list of bridge_vlans was never empty. However, if CONFIG_VLAN_8021Q is not enabled, or the port is not up when it joins a VLAN-aware bridge, the bridge_vlans list will be temporarily empty, and the sja1105_static_config_reload() call from sja1105_vlan_filtering() will fail. To fix this, the simplest thing is to keep VID 4095, the one used for CPU-injected control packets since commit ed040abca4c1 ("net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic"), in the list of bridge VLANs too, not just the list of tag_8021q VLANs. This ensures that the list of bridge VLANs will never be empty. Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method") Reported-by: Radu Pirea (NXP OSS) Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ced8c9cb29c297..e2dc997580a82c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -397,6 +397,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) if (dsa_is_cpu_port(ds, port)) v->pvid = true; list_add(&v->list, &priv->dsa_8021q_vlans); + + v = kmemdup(v, sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; + + list_add(&v->list, &priv->bridge_vlans); } ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid; From 291d0a2c1fa6ff437c8f1156646fdd2525714c80 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 21 Jul 2021 19:17:21 +0100 Subject: [PATCH 657/714] ravb: Fix a typo in comment Fix the typo RX->TX in comment, as the code following the comment process TX and not RX. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 69c50f81e1cb29..805397088850dc 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -920,7 +920,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) if (ravb_rx(ndev, "a, q)) goto out; - /* Processing RX Descriptor Ring */ + /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); /* Clear TX interrupt */ ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); From 9f061b9acbb0bdf5317b301c1608ca55be522c92 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 21 Jul 2021 19:21:26 +0100 Subject: [PATCH 658/714] ravb: Remove extra TAB Align the member description comments for struct ravb_desc by removing the extra TAB. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 86a1eb0634e80a..80e62ca2e3d32f 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -864,7 +864,7 @@ enum GECMR_BIT { /* The Ethernet AVB descriptor definitions. */ struct ravb_desc { - __le16 ds; /* Descriptor size */ + __le16 ds; /* Descriptor size */ u8 cc; /* Content control MSBs (reserved) */ u8 die_dt; /* Descriptor interrupt enable and type */ __le32 dptr; /* Descriptor pointer */ From c79e89ecaa246c880292ba68cbe08c9c30db77e3 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 29 Jun 2021 15:40:18 +0200 Subject: [PATCH 659/714] RISC-V: load initrd wherever it fits into memory Requiring that initrd is loaded below RAM start + 256 MiB led to failure to boot SUSE Linux with GRUB on QEMU, cf. https://lists.gnu.org/archive/html/grub-devel/2021-06/msg00037.html Remove the constraint. Reported-by: Andreas Schwab Signed-off-by: Heinrich Schuchardt Reviewed-by: Atish Patra Acked-by: Ard Biesheuvel Fixes: d7071743db31 ("RISC-V: Add EFI stub support.") Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 6d98cd999680bb..7b3483ba2e847a 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -27,10 +27,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) -/* Load initrd at enough distance from DRAM start */ +/* Load initrd anywhere in system RAM */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { - return image_addr + SZ_256M; + return ULONG_MAX; } #define alloc_screen_info(x...) (&screen_info) From 21cf377a9c40658777ecbd6242be449a19a84e44 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 21 Jul 2021 23:56:41 +0200 Subject: [PATCH 660/714] net: dsa: ensure linearized SKBs in case of tail taggers The function skb_put() that is used by tail taggers to make room for the DSA tag must only be called for linearized SKBS. However in case that the slave device inherited features like NETIF_F_HW_SG or NETIF_F_FRAGLIST the SKB passed to the slaves transmit function may not be linearized. Avoid those SKBs by clearing the NETIF_F_HW_SG and NETIF_F_FRAGLIST flags for tail taggers. Furthermore since the tagging protocol can be changed at runtime move the code for setting up the slaves features into dsa_slave_setup_tagger(). Suggested-by: Vladimir Oltean Signed-off-by: Lino Sanfilippo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ffbba1e7155155..532085da8d8fb1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1808,6 +1808,7 @@ void dsa_slave_setup_tagger(struct net_device *slave) struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; struct net_device *master = cpu_dp->master; + const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; @@ -1819,6 +1820,14 @@ void dsa_slave_setup_tagger(struct net_device *slave) slave->needed_tailroom += master->needed_tailroom; p->xmit = cpu_dp->tag_ops->xmit; + + slave->features = master->vlan_features | NETIF_F_HW_TC; + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + slave->hw_features |= NETIF_F_HW_TC; + slave->features |= NETIF_F_LLTX; + if (slave->needed_tailroom) + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; @@ -1881,11 +1890,6 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - slave_dev->hw_features |= NETIF_F_HW_TC; - slave_dev->features |= NETIF_F_LLTX; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!is_zero_ether_addr(port->mac)) ether_addr_copy(slave_dev->dev_addr, port->mac); From 37120f23ac8998c250573ea3247ff77426551f69 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 21 Jul 2021 23:56:42 +0200 Subject: [PATCH 661/714] net: dsa: tag_ksz: dont let the hardware process the layer 4 checksum If the checksum calculation is offloaded to the network device (e.g due to NETIF_F_HW_CSUM inherited from the DSA master device), the calculated layer 4 checksum is incorrect. This is since the DSA tag which is placed after the layer 4 data is considered as being part of the daa and thus errorneously included into the checksum calculation. To avoid this, always calculate the layer 4 checksum in software. Signed-off-by: Lino Sanfilippo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_ksz.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 53565f48934c0f..a201ccf2435d8d 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -53,6 +53,9 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) u8 *tag; u8 *addr; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -114,6 +117,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, u8 *addr; u16 val; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -164,6 +170,9 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, u8 *addr; u8 *tag; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); From b0084afde27fe8a504377dee65f55bc6aa776937 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 22 Jul 2021 02:56:05 +0300 Subject: [PATCH 662/714] ALSA: usb-audio: Add registration quirk for JBL Quantum headsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These devices has two interfaces, but only the second interface contains the capture endpoint, thus quirk is required to delay the registration until the second interface appears. Tested-by: Jakub Fišer Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20210721235605.53741-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8b8bee3c3dd635..e7accd87e0632d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1897,6 +1897,9 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */ REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ + REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; From 98c5b13f3a878066741a907a9d0f1f388556ed5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 21 Jul 2021 15:33:36 -0700 Subject: [PATCH 663/714] net: sparx5: fix unmet dependencies warning WARNING: unmet direct dependencies detected for PHY_SPARX5_SERDES Depends on [n]: (ARCH_SPARX5 || COMPILE_TEST [=n]) && OF [=y] && HAS_IOMEM [=y] Selected by [y]: - SPARX5_SWITCH [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROCHIP [=y] && NET_SWITCHDEV [=y] && HAS_IOMEM [=y] && OF [=y] Signed-off-by: Randy Dunlap Cc: Lars Povlsen Cc: Steen Hegelund Cc: UNGLinuxDriver@microchip.com Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index ac403d43c74c07..7bdbb2d09a1482 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -3,6 +3,7 @@ config SPARX5_SWITCH depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF + depends on ARCH_SPARX5 || COMPILE_TEST select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER From 9d85a6f44bd5585761947f40f7821c9cd78a1bbe Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 22 Jul 2021 11:23:43 +0800 Subject: [PATCH 664/714] net: sched: cls_api: Fix the the wrong parameter The 4th parameter in tc_chain_notify() should be flags rather than seq. Let's change it back correctly. Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi") Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d73b5c5514a9fa..e3e79e9bd7067d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2904,7 +2904,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, - n->nlmsg_seq, n->nlmsg_type, true); + n->nlmsg_flags, n->nlmsg_type, true); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); break; From 7bbcb919e32d776ca8ddce08abb391ab92eef6a9 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 20 Jul 2021 15:45:23 +0200 Subject: [PATCH 665/714] drm/panel: raspberrypi-touchscreen: Prevent double-free The mipi_dsi_device allocated by mipi_dsi_device_register_full() is already free'd on release. Fixes: 2f733d6194bd ("drm/panel: Add support for the Raspberry Pi 7" Touchscreen.") Signed-off-by: Maxime Ripard Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210720134525.563936-9-maxime@cerno.tech --- drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 2229f1af2ca8cb..46029c5610c808 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -447,7 +447,6 @@ static int rpi_touchscreen_remove(struct i2c_client *i2c) drm_panel_remove(&ts->base); mipi_dsi_device_unregister(ts->dsi); - kfree(ts->dsi); return 0; } From 7aaa0f311e2df2704fa8ddb8ed681a3b5841d0bf Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 22 Jul 2021 15:15:51 +0300 Subject: [PATCH 666/714] dpaa2-switch: seed the buffer pool after allocating the swp Any interraction with the buffer pool (seeding a buffer, acquire one) is made through a software portal (SWP, a DPIO object). There are circumstances where the dpaa2-switch driver probes on a DPSW before any DPIO devices have been probed. In this case, seeding of the buffer pool will lead to a panic since no SWPs are initialized. To fix this, seed the buffer pool after making sure that the software portals have been probed and are ready to be used. Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f3d12d0714fb53..68b78642c045d4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2770,32 +2770,32 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw) if (err) return err; - err = dpaa2_switch_seed_bp(ethsw); - if (err) - goto err_free_dpbp; - err = dpaa2_switch_alloc_rings(ethsw); if (err) - goto err_drain_dpbp; + goto err_free_dpbp; err = dpaa2_switch_setup_dpio(ethsw); if (err) goto err_destroy_rings; + err = dpaa2_switch_seed_bp(ethsw); + if (err) + goto err_deregister_dpio; + err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); if (err) { dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); - goto err_deregister_dpio; + goto err_drain_dpbp; } return 0; +err_drain_dpbp: + dpaa2_switch_drain_bp(ethsw); err_deregister_dpio: dpaa2_switch_free_dpio(ethsw); err_destroy_rings: dpaa2_switch_destroy_rings(ethsw); -err_drain_dpbp: - dpaa2_switch_drain_bp(ethsw); err_free_dpbp: dpaa2_switch_free_dpbp(ethsw); From 9acc8103ab594f72250788cb45a43427f36d685d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 6 Jul 2021 15:41:15 +0100 Subject: [PATCH 667/714] btrfs: fix unpersisted i_size on fsync after expanding truncate If we have an inode that does not have the full sync flag set, was changed in the current transaction, then it is logged while logging some other inode (like its parent directory for example), its i_size is increased by a truncate operation, the log is synced through an fsync of some other inode and then finally we explicitly call fsync on our inode, the new i_size is not persisted. The following example shows how to trigger it, with comments explaining how and why the issue happens: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/foo $ xfs_io -f -c "pwrite -S 0xab 0 1M" /mnt/bar $ sync # Fsync bar, this will be a noop since the file has not yet been # modified in the current transaction. The goal here is to clear # BTRFS_INODE_NEEDS_FULL_SYNC from the inode's runtime flags. $ xfs_io -c "fsync" /mnt/bar # Now rename both files, without changing their parent directory. $ mv /mnt/bar /mnt/bar2 $ mv /mnt/foo /mnt/foo2 # Increase the size of bar2 with a truncate operation. $ xfs_io -c "truncate 2M" /mnt/bar2 # Now fsync foo2, this results in logging its parent inode (the root # directory), and logging the parent results in logging the inode of # file bar2 (its inode item and the new name). The inode of file bar2 # is logged with an i_size of 0 bytes since it's logged in # LOG_INODE_EXISTS mode, meaning we are only logging its names (and # xattrs if it had any) and the i_size of the inode will not be changed # when the log is replayed. $ xfs_io -c "fsync" /mnt/foo2 # Now explicitly fsync bar2. This resulted in doing nothing, not # logging the inode with the new i_size of 2M and the hole from file # offset 1M to 2M. Because the inode did not have the flag # BTRFS_INODE_NEEDS_FULL_SYNC set, when it was logged through the # fsync of file foo2, its last_log_commit field was updated, # resulting in this explicit of file bar2 not doing anything. $ xfs_io -c "fsync" /mnt/bar2 # File bar2 content and size before a power failure. $ od -A d -t x1 /mnt/bar2 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 1048576 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 2097152 # Mount the filesystem to replay the log. $ mount /dev/sdc /mnt # Read the file again, should have the same content and size as before # the power failure happened, but it doesn't, i_size is still at 1M. $ od -A d -t x1 /mnt/bar2 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 1048576 This started to happen after commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), since btrfs_inode_in_log() no longer checks if the inode's list of modified extents is not empty. However, checking that list is not the right way to address this case and the check was added long time ago in commit 125c4cf9f37c98 ("Btrfs: set inode's logged_trans/last_log_commit after ranged fsync") for a different purpose, to address consecutive ranged fsyncs. The reason that checking for the list emptiness makes this test pass is because during an expanding truncate we create an extent map to represent a hole from the old i_size to the new i_size, and add that extent map to the list of modified extents in the inode. However if we are low on available memory and we can not allocate a new extent map, then we don't treat it as an error and just set the full sync flag on the inode, so that the next fsync does not rely on the list of modified extents - so checking for the emptiness of the list to decide if the inode needs to be logged is not reliable, and results in not logging the inode if it was not possible to allocate the extent map for the hole. Fix this by ensuring that if we are only logging that an inode exists (inode item, names/references and xattrs), we don't update the inode's last_log_commit even if it does not have the full sync runtime flag set. A test case for fstests follows soon. CC: stable@vger.kernel.org # 5.13+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dc6eb088d73e34..9fd0348be7f5e7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5526,16 +5526,29 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, spin_lock(&inode->lock); inode->logged_trans = trans->transid; /* - * Don't update last_log_commit if we logged that an inode exists - * after it was loaded to memory (full_sync bit set). - * This is to prevent data loss when we do a write to the inode, - * then the inode gets evicted after all delalloc was flushed, - * then we log it exists (due to a rename for example) and then - * fsync it. This last fsync would do nothing (not logging the - * extents previously written). + * Don't update last_log_commit if we logged that an inode exists. + * We do this for two reasons: + * + * 1) We might have had buffered writes to this inode that were + * flushed and had their ordered extents completed in this + * transaction, but we did not previously log the inode with + * LOG_INODE_ALL. Later the inode was evicted and after that + * it was loaded again and this LOG_INODE_EXISTS log operation + * happened. We must make sure that if an explicit fsync against + * the inode is performed later, it logs the new extents, an + * updated inode item, etc, and syncs the log. The same logic + * applies to direct IO writes instead of buffered writes. + * + * 2) When we log the inode with LOG_INODE_EXISTS, its inode item + * is logged with an i_size of 0 or whatever value was logged + * before. If later the i_size of the inode is increased by a + * truncate operation, the log is synced through an fsync of + * some other inode and then finally an explicit fsync against + * this inode is made, we must make sure this fsync logs the + * inode with the new i_size, the hole between old i_size and + * the new i_size, and syncs the log. */ - if (inode_only != LOG_INODE_EXISTS || - !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) + if (inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); } From 16a200f66ede3f9afa2e51d90ade017aaa18d213 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sun, 4 Jul 2021 19:14:39 +0800 Subject: [PATCH 668/714] btrfs: check for missing device in btrfs_trim_fs A fstrim on a degraded raid1 can trigger the following null pointer dereference: BTRFS info (device loop0): allowing degraded mounts BTRFS info (device loop0): disk space caching is enabled BTRFS info (device loop0): has skinny extents BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing BTRFS info (device loop0): enabling ssd optimizations BUG: kernel NULL pointer dereference, address: 0000000000000620 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 4574 Comm: fstrim Not tainted 5.13.0-rc7+ #31 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:btrfs_trim_fs+0x199/0x4a0 [btrfs] RSP: 0018:ffff959541797d28 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff946f84eca508 RCX: a7a67937adff8608 RDX: ffff946e8122d000 RSI: 0000000000000000 RDI: ffffffffc02fdbf0 RBP: ffff946ea4615000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff946e8122d960 R12: 0000000000000000 R13: ffff959541797db8 R14: ffff946e8122d000 R15: ffff959541797db8 FS: 00007f55917a5080(0000) GS:ffff946f9bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000620 CR3: 000000002d2c8001 CR4: 00000000000706f0 Call Trace: btrfs_ioctl_fitrim+0x167/0x260 [btrfs] btrfs_ioctl+0x1c00/0x2fe0 [btrfs] ? selinux_file_ioctl+0x140/0x240 ? syscall_trace_enter.constprop.0+0x188/0x240 ? __x64_sys_ioctl+0x83/0xb0 __x64_sys_ioctl+0x83/0xb0 Reproducer: $ mkfs.btrfs -fq -d raid1 -m raid1 /dev/loop0 /dev/loop1 $ mount /dev/loop0 /btrfs $ umount /btrfs $ btrfs dev scan --forget $ mount -o degraded /dev/loop0 /btrfs $ fstrim /btrfs The reason is we call btrfs_trim_free_extents() for the missing device, which uses device->bdev (NULL for missing device) to find if the device supports discard. Fix is to check if the device is missing before calling btrfs_trim_free_extents(). CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d296483d148fdf..268ce58d45697d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + continue; + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; From 8949b9a114019b03fbd0d03d65b8647cba4feef3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 21 Jul 2021 17:31:48 +0100 Subject: [PATCH 669/714] btrfs: fix lock inversion problem when doing qgroup extent tracing At btrfs_qgroup_trace_extent_post() we call btrfs_find_all_roots() with a NULL value as the transaction handle argument, which makes that function take the commit_root_sem semaphore, which is necessary when we don't hold a transaction handle or any other mechanism to prevent a transaction commit from wiping out commit roots. However btrfs_qgroup_trace_extent_post() can be called in a context where we are holding a write lock on an extent buffer from a subvolume tree, namely from btrfs_truncate_inode_items(), called either during truncate or unlink operations. In this case we end up with a lock inversion problem because the commit_root_sem is a higher level lock, always supposed to be acquired before locking any extent buffer. Lockdep detects this lock inversion problem since we switched the extent buffer locks from custom locks to semaphores, and when running btrfs/158 from fstests, it reported the following trace: [ 9057.626435] ====================================================== [ 9057.627541] WARNING: possible circular locking dependency detected [ 9057.628334] 5.14.0-rc2-btrfs-next-93 #1 Not tainted [ 9057.628961] ------------------------------------------------------ [ 9057.629867] kworker/u16:4/30781 is trying to acquire lock: [ 9057.630824] ffff8e2590f58760 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.632542] but task is already holding lock: [ 9057.633551] ffff8e25582d4b70 (&fs_info->commit_root_sem){++++}-{3:3}, at: iterate_extent_inodes+0x10b/0x280 [btrfs] [ 9057.635255] which lock already depends on the new lock. [ 9057.636292] the existing dependency chain (in reverse order) is: [ 9057.637240] -> #1 (&fs_info->commit_root_sem){++++}-{3:3}: [ 9057.638138] down_read+0x46/0x140 [ 9057.638648] btrfs_find_all_roots+0x41/0x80 [btrfs] [ 9057.639398] btrfs_qgroup_trace_extent_post+0x37/0x70 [btrfs] [ 9057.640283] btrfs_add_delayed_data_ref+0x418/0x490 [btrfs] [ 9057.641114] btrfs_free_extent+0x35/0xb0 [btrfs] [ 9057.641819] btrfs_truncate_inode_items+0x424/0xf70 [btrfs] [ 9057.642643] btrfs_evict_inode+0x454/0x4f0 [btrfs] [ 9057.643418] evict+0xcf/0x1d0 [ 9057.643895] do_unlinkat+0x1e9/0x300 [ 9057.644525] do_syscall_64+0x3b/0xc0 [ 9057.645110] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 9057.645835] -> #0 (btrfs-tree-00){++++}-{3:3}: [ 9057.646600] __lock_acquire+0x130e/0x2210 [ 9057.647248] lock_acquire+0xd7/0x310 [ 9057.647773] down_read_nested+0x4b/0x140 [ 9057.648350] __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.649175] btrfs_read_lock_root_node+0x31/0x40 [btrfs] [ 9057.650010] btrfs_search_slot+0x537/0xc00 [btrfs] [ 9057.650849] scrub_print_warning_inode+0x89/0x370 [btrfs] [ 9057.651733] iterate_extent_inodes+0x1e3/0x280 [btrfs] [ 9057.652501] scrub_print_warning+0x15d/0x2f0 [btrfs] [ 9057.653264] scrub_handle_errored_block.isra.0+0x135f/0x1640 [btrfs] [ 9057.654295] scrub_bio_end_io_worker+0x101/0x2e0 [btrfs] [ 9057.655111] btrfs_work_helper+0xf8/0x400 [btrfs] [ 9057.655831] process_one_work+0x247/0x5a0 [ 9057.656425] worker_thread+0x55/0x3c0 [ 9057.656993] kthread+0x155/0x180 [ 9057.657494] ret_from_fork+0x22/0x30 [ 9057.658030] other info that might help us debug this: [ 9057.659064] Possible unsafe locking scenario: [ 9057.659824] CPU0 CPU1 [ 9057.660402] ---- ---- [ 9057.660988] lock(&fs_info->commit_root_sem); [ 9057.661581] lock(btrfs-tree-00); [ 9057.662348] lock(&fs_info->commit_root_sem); [ 9057.663254] lock(btrfs-tree-00); [ 9057.663690] *** DEADLOCK *** [ 9057.664437] 4 locks held by kworker/u16:4/30781: [ 9057.665023] #0: ffff8e25922a1148 ((wq_completion)btrfs-scrub){+.+.}-{0:0}, at: process_one_work+0x1c7/0x5a0 [ 9057.666260] #1: ffffabb3451ffe70 ((work_completion)(&work->normal_work)){+.+.}-{0:0}, at: process_one_work+0x1c7/0x5a0 [ 9057.667639] #2: ffff8e25922da198 (&ret->mutex){+.+.}-{3:3}, at: scrub_handle_errored_block.isra.0+0x5d2/0x1640 [btrfs] [ 9057.669017] #3: ffff8e25582d4b70 (&fs_info->commit_root_sem){++++}-{3:3}, at: iterate_extent_inodes+0x10b/0x280 [btrfs] [ 9057.670408] stack backtrace: [ 9057.670976] CPU: 7 PID: 30781 Comm: kworker/u16:4 Not tainted 5.14.0-rc2-btrfs-next-93 #1 [ 9057.672030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 9057.673492] Workqueue: btrfs-scrub btrfs_work_helper [btrfs] [ 9057.674258] Call Trace: [ 9057.674588] dump_stack_lvl+0x57/0x72 [ 9057.675083] check_noncircular+0xf3/0x110 [ 9057.675611] __lock_acquire+0x130e/0x2210 [ 9057.676132] lock_acquire+0xd7/0x310 [ 9057.676605] ? __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.677313] ? lock_is_held_type+0xe8/0x140 [ 9057.677849] down_read_nested+0x4b/0x140 [ 9057.678349] ? __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.679068] __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.679760] btrfs_read_lock_root_node+0x31/0x40 [btrfs] [ 9057.680458] btrfs_search_slot+0x537/0xc00 [btrfs] [ 9057.681083] ? _raw_spin_unlock+0x29/0x40 [ 9057.681594] ? btrfs_find_all_roots_safe+0x11f/0x140 [btrfs] [ 9057.682336] scrub_print_warning_inode+0x89/0x370 [btrfs] [ 9057.683058] ? btrfs_find_all_roots_safe+0x11f/0x140 [btrfs] [ 9057.683834] ? scrub_write_block_to_dev_replace+0xb0/0xb0 [btrfs] [ 9057.684632] iterate_extent_inodes+0x1e3/0x280 [btrfs] [ 9057.685316] scrub_print_warning+0x15d/0x2f0 [btrfs] [ 9057.685977] ? ___ratelimit+0xa4/0x110 [ 9057.686460] scrub_handle_errored_block.isra.0+0x135f/0x1640 [btrfs] [ 9057.687316] scrub_bio_end_io_worker+0x101/0x2e0 [btrfs] [ 9057.688021] btrfs_work_helper+0xf8/0x400 [btrfs] [ 9057.688649] ? lock_is_held_type+0xe8/0x140 [ 9057.689180] process_one_work+0x247/0x5a0 [ 9057.689696] worker_thread+0x55/0x3c0 [ 9057.690175] ? process_one_work+0x5a0/0x5a0 [ 9057.690731] kthread+0x155/0x180 [ 9057.691158] ? set_kthread_struct+0x40/0x40 [ 9057.691697] ret_from_fork+0x22/0x30 Fix this by making btrfs_find_all_roots() never attempt to lock the commit_root_sem when it is called from btrfs_qgroup_trace_extent_post(). We can't just pass a non-NULL transaction handle to btrfs_find_all_roots() from btrfs_qgroup_trace_extent_post(), because that would make backref lookup not use commit roots and acquire read locks on extent buffers, and therefore could deadlock when btrfs_qgroup_trace_extent_post() is called from the btrfs_truncate_inode_items() code path which has acquired a write lock on an extent buffer of the subvolume btree. CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 +++--- fs/btrfs/backref.h | 3 ++- fs/btrfs/delayed-ref.c | 4 ++-- fs/btrfs/qgroup.c | 38 +++++++++++++++++++++++++++-------- fs/btrfs/qgroup.h | 2 +- fs/btrfs/tests/qgroup-tests.c | 20 +++++++++--------- 6 files changed, 48 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7a8a2fc195338c..78b202d198b8e1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **roots, - bool ignore_offset) + bool ignore_offset, bool skip_commit_root_sem) { int ret; - if (!trans) + if (!trans && !skip_commit_root_sem) down_read(&fs_info->commit_root_sem); ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, time_seq, roots, ignore_offset); - if (!trans) + if (!trans && !skip_commit_root_sem) up_read(&fs_info->commit_root_sem); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 17abde7f794ce3..ff5f07f9940bd9 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, const u64 *extent_item_pos, bool ignore_offset); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, bool ignore_offset); + u64 time_seq, struct ulist **roots, bool ignore_offset, + bool skip_commit_root_sem); char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 06bc842ecdb34e..ca848b1834747c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); if (qrecord_inserted) - btrfs_qgroup_trace_extent_post(fs_info, record); + btrfs_qgroup_trace_extent_post(trans, record); return 0; } @@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, if (qrecord_inserted) - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); return 0; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 07ec06d4e97263..0fa121171ca17f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord) { struct ulist *old_root; u64 bytenr = qrecord->bytenr; int ret; - ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); + /* + * We are always called in a context where we are already holding a + * transaction handle. Often we are called when adding a data delayed + * reference from btrfs_truncate_inode_items() (truncating or unlinking), + * in which case we will be holding a write lock on extent buffer from a + * subvolume tree. In this case we can't allow btrfs_find_all_roots() to + * acquire fs_info->commit_root_sem, because that is a higher level lock + * that must be acquired before locking any extent buffers. + * + * So we want btrfs_find_all_roots() to not acquire the commit_root_sem + * but we can't pass it a non-NULL transaction handle, because otherwise + * it would not use commit roots and would lock extent buffers, causing + * a deadlock if it ends up trying to read lock the same extent buffer + * that was previously write locked at btrfs_truncate_inode_items(). + * + * So pass a NULL transaction handle to btrfs_find_all_roots() and + * explicitly tell it to not acquire the commit_root_sem - if we are + * holding a transaction handle we don't need its protection. + */ + ASSERT(trans != NULL); + + ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, + false, true); if (ret < 0) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_warn(fs_info, + trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + btrfs_warn(trans->fs_info, "error accounting new delayed refs extent (err code: %d), quota inconsistent", ret); return 0; @@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, kfree(record); return 0; } - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, @@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, - &record->old_roots, false); + &record->old_roots, false, false); if (ret < 0) goto cleanup; } @@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * current root. It's safe inside commit_transaction(). */ ret = btrfs_find_all_roots(trans, fs_info, - record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); + record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false); if (ret < 0) goto cleanup; if (qgroup_to_skip) { @@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, num_bytes = found.offset; ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, - &roots, false); + &roots, false, false); if (ret < 0) goto out; /* For rescan, just pass old_roots as NULL */ diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 7283e4f549af74..880e9df0dac1d7 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock( * using current root, then we can move all expensive backref walk out of * transaction committing, but not now as qgroup accounting will be wrong again. */ -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord); /* diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index f3137285a9e2df..98b5aaba46f165 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, * quota. */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); From c7c3a6dcb1efd52949acc1e640be9aad1206a13a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:53:59 +0200 Subject: [PATCH 670/714] btrfs: store a block_device in struct btrfs_ordered_extent Store the block device instead of the gendisk in the btrfs_ordered_extent structure instead of acquiring a reference to it later. Note: this is from series removing bdgrab/bdput, btrfs is one of the last users. Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- fs/btrfs/ordered-data.c | 2 -- fs/btrfs/ordered-data.h | 3 +-- fs/btrfs/zoned.c | 12 ++++-------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f60314c36c55e..0117d867ecf876 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (ordered_extent->disk) + if (ordered_extent->bdev) btrfs_rewrite_logical_zoned(ordered_extent); btrfs_free_io_failure_record(inode, start, end); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6eb41b7c0c8439..5c0f8481e25e06 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; entry->physical = (u64)-1; - entry->disk = NULL; - entry->partno = (u8)-1; ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 566472004edd33..b2d88aba8420cc 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -145,8 +145,7 @@ struct btrfs_ordered_extent { * command in a workqueue context */ u64 physical; - struct gendisk *disk; - u8 partno; + struct block_device *bdev; }; /* diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 297c0b1c0634d1..907c2cc45c9cbc 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, return; ordered->physical = physical; - ordered->disk = bio->bi_bdev->bd_disk; - ordered->partno = bio->bi_bdev->bd_partno; + ordered->bdev = bio->bi_bdev; btrfs_put_ordered_extent(ordered); } @@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_ordered_sum *sum; - struct block_device *bdev; u64 orig_logical = ordered->disk_bytenr; u64 *logical = NULL; int nr, stripe_len; /* Zoned devices should not have partitions. So, we can assume it is 0 */ - ASSERT(ordered->partno == 0); - bdev = bdgrab(ordered->disk->part0); - if (WARN_ON(!bdev)) + ASSERT(!bdev_is_partition(ordered->bdev)); + if (WARN_ON(!ordered->bdev)) return; - if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev, + if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev, ordered->physical, &logical, &nr, &stripe_len))) goto out; @@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) out: kfree(logical); - bdput(bdev); } bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, From 67f0d6d9883c13174669f88adac4f0ee656cc16a Mon Sep 17 00:00:00 2001 From: Haoran Luo Date: Wed, 21 Jul 2021 14:12:07 +0000 Subject: [PATCH 671/714] tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop. The "rb_per_cpu_empty()" misinterpret the condition (as not-empty) when "head_page" and "commit_page" of "struct ring_buffer_per_cpu" points to the same buffer page, whose "buffer_data_page" is empty and "read" field is non-zero. An error scenario could be constructed as followed (kernel perspective): 1. All pages in the buffer has been accessed by reader(s) so that all of them will have non-zero "read" field. 2. Read and clear all buffer pages so that "rb_num_of_entries()" will return 0 rendering there's no more data to read. It is also required that the "read_page", "commit_page" and "tail_page" points to the same page, while "head_page" is the next page of them. 3. Invoke "ring_buffer_lock_reserve()" with large enough "length" so that it shot pass the end of current tail buffer page. Now the "head_page", "commit_page" and "tail_page" points to the same page. 4. Discard current event with "ring_buffer_discard_commit()", so that "head_page", "commit_page" and "tail_page" points to a page whose buffer data page is now empty. When the error scenario has been constructed, "tracing_read_pipe" will be trapped inside a deadloop: "trace_empty()" returns 0 since "rb_per_cpu_empty()" returns 0 when it hits the CPU containing such constructed ring buffer. Then "trace_find_next_entry_inc()" always return NULL since "rb_num_of_entries()" reports there's no more entry to read. Finally "trace_seq_to_user()" returns "-EBUSY" spanking "tracing_read_pipe" back to the start of the "waitagain" loop. I've also written a proof-of-concept script to construct the scenario and trigger the bug automatically, you can use it to trace and validate my reasoning above: https://github.com/aegistudio/RingBufferDetonator.git Tests has been carried out on linux kernel 5.14-rc2 (2734d6c1b1a089fb593ef6a23d4b70903526fe0c), my fixed version of kernel (for testing whether my update fixes the bug) and some older kernels (for range of affected kernels). Test result is also attached to the proof-of-concept repository. Link: https://lore.kernel.org/linux-trace-devel/YPaNxsIlb2yjSi5Y@aegistudio/ Link: https://lore.kernel.org/linux-trace-devel/YPgrN85WL9VyrZ55@aegistudio Cc: stable@vger.kernel.org Fixes: bf41a158cacba ("ring-buffer: make reentrant") Suggested-by: Linus Torvalds Signed-off-by: Haoran Luo Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1463eac11a36f..e592d1df6f888a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3880,10 +3880,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) if (unlikely(!head)) return true; - return reader->read == rb_page_commit(reader) && - (commit == reader || - (commit == head && - head->read == rb_page_commit(commit))); + /* Reader should exhaust content in reader page */ + if (reader->read != rb_page_commit(reader)) + return false; + + /* + * If writers are committing on the reader page, knowing all + * committed content has been read, the ring buffer is empty. + */ + if (commit == reader) + return true; + + /* + * If writers are committing on a page other than reader page + * and head page, there should always be content to read. + */ + if (commit != head) + return false; + + /* + * Writers are committing on the head page, we just need + * to care about there're committed data, and the reader will + * swap reader page with head page when it is to read data. + */ + return rb_page_commit(commit) == 0; } /** From 2485bd7557a7edb4520b4072af464f0a08c8efe0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 22 Jul 2021 14:53:32 +1000 Subject: [PATCH 672/714] cifs: only write 64kb at a time when fallocating a small region of a file We only allow sending single credit writes through the SMB2_write() synchronous api so split this into smaller chunks. Fixes: 966a3cb7c7db ("cifs: improve fallocate emulation") Signed-off-by: Ronnie Sahlberg Reported-by: Namjae Jeon Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ba3c58e1f72565..5cefb5972396e2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int nbytes; + int rc, nbytes; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; @@ -3625,13 +3625,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, io_parms.tcon = tcon; io_parms.persistent_fid = cfile->fid.persistent_fid; io_parms.volatile_fid = cfile->fid.volatile_fid; - io_parms.offset = off; - io_parms.length = len; - /* iov[0] is reserved for smb header */ - iov[1].iov_base = buf; - iov[1].iov_len = io_parms.length; - return SMB2_write(xid, &io_parms, &nbytes, iov, 1); + while (len) { + io_parms.offset = off; + io_parms.length = len; + if (io_parms.length > SMB2_MAX_BUFFER_SIZE) + io_parms.length = SMB2_MAX_BUFFER_SIZE; + /* iov[0] is reserved for smb header */ + iov[1].iov_base = buf; + iov[1].iov_len = io_parms.length; + rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1); + if (rc) + break; + if (nbytes > len) + return -EINVAL; + buf += nbytes; + off += nbytes; + len -= nbytes; + } + return rc; } static int smb3_simple_fallocate_range(unsigned int xid, From b62366181a5e9473e9c10e98f400049491c55876 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 16 Jul 2021 03:26:41 -0300 Subject: [PATCH 673/714] cifs: support share failover when remounting When remouting a DFS share, force a new DFS referral of the path and if the currently cached targets do not match any of the new targets or there was no cached targets, then mark it for reconnect. For example: $ mount //dom/dfs/link /mnt -o username=foo,password=bar $ ls /mnt oldfile.txt change target share of 'link' in server settings $ mount /mnt -o remount,username=foo,password=bar $ ls /mnt newfile.txt Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 4 +- fs/cifs/dfs_cache.c | 229 ++++++++++++++++++++++++++++++++++++------- fs/cifs/dfs_cache.h | 3 + fs/cifs/fs_context.c | 7 ++ 4 files changed, 203 insertions(+), 40 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1b04d6ec14ddac..3781eee9360aff 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server) #ifdef CONFIG_CIFS_DFS_UPCALL struct super_block *sb = NULL; struct cifs_sb_info *cifs_sb = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; #endif @@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ { int rc; char *npath = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; struct smb3_fs_context tmp_ctx = {NULL}; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 7c1769714609bd..28374559284413 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -19,6 +19,7 @@ #include "cifs_debug.h" #include "cifs_unicode.h" #include "smb2glob.h" +#include "dns_resolve.h" #include "dfs_cache.h" @@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl) err_free_it: list_for_each_entry_safe(it, nit, head, it_list) { + list_del(&it->it_list); kfree(it->it_name); kfree(it); } @@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, return 0; } +static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2) +{ + char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0}; + const char *host; + size_t hostlen; + char *ip = NULL; + struct sockaddr sa; + bool match; + int rc; + + if (strcasecmp(s1, s2)) + return false; + + /* + * Resolve share's hostname and check if server address matches. Otherwise just ignore it + * as we could not have upcall to resolve hostname or failed to convert ip address. + */ + match = true; + extract_unc_hostname(s1, &host, &hostlen); + scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); + + rc = dns_resolve_server_name_to_ip(unc, &ip, NULL); + if (rc < 0) { + cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n", + __func__, (int)hostlen, host); + return true; + } + + if (!cifs_convert_address(&sa, ip, strlen(ip))) { + cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", + __func__, ip); + } else { + mutex_lock(&server->srv_mutex); + match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); + mutex_unlock(&server->srv_mutex); + } + + kfree(ip); + return match; +} + +/* + * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new + * target shares in @refs. + */ +static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, + const struct dfs_info3_param *refs, int numrefs) +{ + struct dfs_cache_tgt_iterator *it; + int i; + + for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { + for (i = 0; i < numrefs; i++) { + if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), + refs[i].node_name)) + return; + } + } + + cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); + for (i = 0; i < tcon->ses->chan_count; i++) { + spin_lock(&GlobalMid_Lock); + if (tcon->ses->chans[i].server->tcpStatus != CifsExiting) + tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + } +} + +/* Refresh dfs referral of tcon and mark it for reconnect if needed */ +static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) +{ + const char *path = tcon->dfs_path + 1; + struct cifs_ses *ses; + struct cache_entry *ce; + struct dfs_info3_param *refs = NULL; + int numrefs = 0; + bool needs_refresh = false; + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + int rc = 0; + unsigned int xid; + + ses = find_ipc_from_server_path(sessions, path); + if (IS_ERR(ses)) { + cifs_dbg(FYI, "%s: could not find ipc session\n", __func__); + return PTR_ERR(ses); + } + + down_read(&htable_rw_lock); + ce = lookup_cache_entry(path); + needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); + if (!IS_ERR(ce)) { + rc = get_targets(ce, &tl); + if (rc) + cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + } + up_read(&htable_rw_lock); + + if (!needs_refresh) { + rc = 0; + goto out; + } + + xid = get_xid(); + rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); + free_xid(xid); + + /* Create or update a cache entry with the new referral */ + if (!rc) { + dump_refs(refs, numrefs); + + down_write(&htable_rw_lock); + ce = lookup_cache_entry(path); + if (IS_ERR(ce)) + add_cache_entry_locked(refs, numrefs); + else if (force_refresh || cache_entry_expired(ce)) + update_cache_entry_locked(ce, refs, numrefs); + up_write(&htable_rw_lock); + + mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + } + +out: + dfs_cache_free_tgts(&tl); + free_dfs_info_array(refs, numrefs); + return rc; +} + +/** + * dfs_cache_remount_fs - remount a DFS share + * + * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not + * match any of the new targets, mark it for reconnect. + * + * @cifs_sb: cifs superblock. + * + * Return zero if remounted, otherwise non-zero. + */ +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) +{ + struct cifs_tcon *tcon; + struct mount_group *mg; + struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; + int rc; + + if (!cifs_sb || !cifs_sb->master_tlink) + return -EINVAL; + + tcon = cifs_sb_master_tcon(cifs_sb); + if (!tcon->dfs_path) { + cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__); + return 0; + } + + if (uuid_is_null(&cifs_sb->dfs_mount_id)) { + cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__); + return -EINVAL; + } + + mutex_lock(&mount_group_list_lock); + mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); + if (IS_ERR(mg)) { + mutex_unlock(&mount_group_list_lock); + cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__); + return PTR_ERR(mg); + } + kref_get(&mg->refcount); + mutex_unlock(&mount_group_list_lock); + + spin_lock(&mg->lock); + memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0])); + spin_unlock(&mg->lock); + + /* + * After reconnecting to a different server, unique ids won't match anymore, so we disable + * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). + */ + cifs_autodisable_serverino(cifs_sb); + /* + * Force the use of prefix path to support failover on DFS paths that resolve to targets + * that have different prefix paths. + */ + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = refresh_tcon(sessions, tcon, true); + + kref_put(&mg->refcount, mount_group_release); + return rc; +} + /* * Refresh all active dfs mounts regardless of whether they are in cache or not. * (cache can be cleared) @@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions) struct cifs_ses *ses; struct cifs_tcon *tcon, *ntcon; struct list_head tcons; - unsigned int xid; INIT_LIST_HEAD(&tcons); @@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions) spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) { - const char *path = tcon->dfs_path + 1; - struct cache_entry *ce; - struct dfs_info3_param *refs = NULL; - int numrefs = 0; - bool needs_refresh = false; - int rc = 0; - list_del_init(&tcon->ulist); - - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) - goto next_tcon; - - down_read(&htable_rw_lock); - ce = lookup_cache_entry(path); - needs_refresh = IS_ERR(ce) || cache_entry_expired(ce); - up_read(&htable_rw_lock); - - if (!needs_refresh) - goto next_tcon; - - xid = get_xid(); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); - - /* Create or update a cache entry with the new referral */ - if (!rc) { - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - } - -next_tcon: - free_dfs_info_array(refs, numrefs); + refresh_tcon(sessions, tcon, false); cifs_put_tcon(tcon); } } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index b29d3ae64829a4..52070d1df18975 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -13,6 +13,8 @@ #include #include "cifsglob.h" +#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), } + struct dfs_cache_tgt_list { int tl_numtgts; struct list_head tl_list; @@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); static inline struct dfs_cache_tgt_iterator * dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl, diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 553adfbcc22a60..9a59d7ff9a11bf 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif */ #include @@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc) smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); smb3_update_mnt_flags(cifs_sb); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (!rc) + rc = dfs_cache_remount_fs(cifs_sb); +#endif return rc; } From 21a64910997e0c1d268bebf9b1217ba5804d592d Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 22 Jul 2021 13:50:41 -0500 Subject: [PATCH 674/714] CIFS: Clarify SMB1 code for POSIX Create Coverity also complains about the way we calculate the offset (starting from the address of a 4 byte array within the header structure rather than from the beginning of the struct plus 4 bytes) for SMB1 CIFSPOSIXCreate. This changeset doesn't change the address but makes it slightly clearer. Addresses-Coverity: 711518 ("Out of bounds write") Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f72e3b3dca6953..d4144c18260494 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1081,7 +1081,8 @@ CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset); + /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ + pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4); pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pdata->Permissions = cpu_to_le64(mode); pdata->PosixOpenFlags = cpu_to_le32(posix_flags); From 7b09d4e0be94968b7c6c117e34ca90cea9c6d986 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 22 Jul 2021 14:35:15 -0500 Subject: [PATCH 675/714] CIFS: Clarify SMB1 code for POSIX delete file Coverity also complains about the way we calculate the offset (starting from the address of a 4 byte array within the header structure rather than from the beginning of the struct plus 4 bytes) for SMB1 CIFSPOSIXDelFile. This changeset doesn't change the address but makes it slightly clearer. Addresses-Coverity: 711519 ("Out of bounds write") Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d4144c18260494..65d1a65bfc3710 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -873,8 +873,11 @@ CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, InformationLevel) - 4; offset = param_offset + params; - /* Setup pointer to Request Data (inode type) */ - pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset); + /* Setup pointer to Request Data (inode type). + * Note that SMB offsets are from the beginning of SMB which is 4 bytes + * in, after RFC1001 field + */ + pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4); pRqD->type = cpu_to_le16(type); pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); From 0cc936f74bcacb039b7533aeac0a887dfc896bf6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Jul 2021 17:08:07 -0600 Subject: [PATCH 676/714] io_uring: fix early fdput() of file A previous commit shuffled some code around, and inadvertently used struct file after fdput() had been called on it. As we can't touch the file post fdput() dropping our reference, move the fdput() to after that has been done. Cc: Pavel Begunkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/YPnqM0fY3nM5RdRI@zeniv-ca.linux.org.uk/ Fixes: f2a48dd09b8e ("io_uring: refactor io_sq_offload_create()") Reported-by: Al Viro Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fe3d948658ad33..f2fe4eca150bfe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7991,9 +7991,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, f = fdget(p->wq_fd); if (!f.file) return -ENXIO; - fdput(f); - if (f.file->f_op != &io_uring_fops) + if (f.file->f_op != &io_uring_fops) { + fdput(f); return -EINVAL; + } + fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { struct task_struct *tsk; From 488968a8945c119859d91bb6a8dc13bf50002f15 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 23 Jul 2021 11:21:24 +1000 Subject: [PATCH 677/714] cifs: fix fallocate when trying to allocate a hole. Remove the conditional checking for out_data_len and skipping the fallocate if it is 0. This is wrong will actually change any legitimate the fallocate where the entire region is unallocated into a no-op. Additionally, before allocating the range, if FALLOC_FL_KEEP_SIZE is set then we need to clamp the length of the fallocate region as to not extend the size of the file. Fixes: 966a3cb7c7db ("cifs: improve fallocate emulation") Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 5cefb5972396e2..23d6f4d716498f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3667,11 +3667,6 @@ static int smb3_simple_fallocate_range(unsigned int xid, (char **)&out_data, &out_data_len); if (rc) goto out; - /* - * It is already all allocated - */ - if (out_data_len == 0) - goto out; buf = kzalloc(1024 * 1024, GFP_KERNEL); if (buf == NULL) { @@ -3794,6 +3789,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, goto out; } + if (keep_size == true) { + /* + * We can not preallocate pages beyond the end of the file + * in SMB2 + */ + if (off >= i_size_read(inode)) { + rc = 0; + goto out; + } + /* + * For fallocates that are partially beyond the end of file, + * clamp len so we only fallocate up to the end of file. + */ + if (off + len > i_size_read(inode)) { + len = i_size_read(inode) - off; + } + } + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { /* * At this point, we are trying to fallocate an internal From c09dc9e1cd3c205f66b2505d742e819735c0eb6f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:46 +0200 Subject: [PATCH 678/714] riscv: Fix memory_limit for 64-bit kernel As described in Documentation/riscv/vm-layout.rst, the end of the virtual address space for 64-bit kernel is occupied by the modules/BPF/ kernel mappings so this actually reduces the amount of memory we are able to map and then use in the linear mapping. So make sure this limit is correctly set. Signed-off-by: Alexandre Ghiti Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ac48742fa6fc64..e4356d65fdce35 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -127,10 +127,17 @@ void __init mem_init(void) } /* - * The default maximal physical memory size is -PAGE_OFFSET, - * limit the memory size via mem. + * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, + * whereas for 64-bit kernel, the end of the virtual address space is occupied + * by the modules/BPF/kernel mappings which reduces the available size of the + * linear mapping. + * Limit the memory size via mem. */ +#ifdef CONFIG_64BIT +static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; +#else static phys_addr_t memory_limit = -PAGE_OFFSET; +#endif static int __init early_mem(char *p) { From c99127c452484ac89c75af39c3c865174ce2de99 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:47 +0200 Subject: [PATCH 679/714] riscv: Make sure the linear mapping does not use the kernel mapping For 64-bit kernel, the end of the address space is occupied by the kernel mapping and currently, the functions to populate the kernel page tables (i.e. create_p*d_mapping) do not override existing mapping so we must make sure the linear mapping does not map memory in the kernel mapping by clipping the memory above the memory limit. Signed-off-by: Alexandre Ghiti Fixes: c9811e379b21 ("riscv: Add mem kernel parameter support") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index e4356d65fdce35..644a34b0d77de1 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -717,6 +717,8 @@ static void __init setup_vm_final(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); + if (end >= __pa(PAGE_OFFSET) + memory_limit) + end = __pa(PAGE_OFFSET) + memory_limit; map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { From db6b84a368b495cb7e41be9cb9e73d4d0537d027 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:48 +0200 Subject: [PATCH 680/714] riscv: Make sure the kernel mapping does not overlap with IS_ERR_VALUE The check that is done in setup_bootmem currently only works for 32-bit kernel since the kernel mapping has been moved outside of the linear mapping for 64-bit kernel. So make sure that for 64-bit kernel, the kernel mapping does not overlap with the last 4K of the addressable memory. Signed-off-by: Alexandre Ghiti Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 644a34b0d77de1..a14bf3910eec95 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -159,7 +159,7 @@ static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t max_mapped_addr = __pa(~(ulong)0); + phys_addr_t __maybe_unused max_mapped_addr; phys_addr_t dram_end; #ifdef CONFIG_XIP_KERNEL @@ -183,14 +183,20 @@ static void __init setup_bootmem(void) dram_end = memblock_end_of_DRAM(); +#ifndef CONFIG_64BIT /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE * macro. Make sure that last 4k bytes are not usable by memblock - * if end of dram is equal to maximum addressable memory. + * if end of dram is equal to maximum addressable memory. For 64-bit + * kernel, this problem can't happen here as the end of the virtual + * address space is occupied by the kernel mapping then this check must + * be done in create_kernel_page_table. */ + max_mapped_addr = __pa(~(ulong)0); if (max_mapped_addr == (dram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); +#endif min_low_pfn = PFN_UP(memblock_start_of_DRAM()); max_low_pfn = max_pfn = PFN_DOWN(dram_end); @@ -578,6 +584,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % map_size) != 0); +#ifdef CONFIG_64BIT + /* + * The last 4K bytes of the addressable memory can not be mapped because + * of IS_ERR_VALUE macro. + */ + BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); +#endif + pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; #ifndef __PAGETABLE_PMD_FOLDED From f62f3c20647ebd5fb6ecb8f0b477b9281c44c10a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 20 Jul 2021 20:43:09 +1000 Subject: [PATCH 681/714] KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow The kvmppc_rtas_hcall() sets the host rtas_args.rets pointer based on the rtas_args.nargs that was provided by the guest. That guest nargs value is not range checked, so the guest can cause the host rets pointer to be pointed outside the args array. The individual rtas function handlers check the nargs and nrets values to ensure they are correct, but if they are not, the handlers store a -3 (0xfffffffd) failure indication in rets[0] which corrupts host memory. Fix this by testing up front whether the guest supplied nargs and nret would exceed the array size, and fail the hcall directly without storing a failure indication to rets[0]. Also expand on a comment about why we kill the guest and try not to return errors directly if we have a valid rets[0] pointer. Fixes: 8e591cb72047 ("KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls") Cc: stable@vger.kernel.org # v3.10+ Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_rtas.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index c5e677508d3b23..0f847f1e5ddd0b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } From d9c57d3ed52a92536f5fa59dc5ccdd58b4875076 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 8 Jul 2021 21:26:22 +1000 Subject: [PATCH 682/714] KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state The H_ENTER_NESTED hypercall is handled by the L0, and it is a request by the L1 to switch the context of the vCPU over to that of its L2 guest, and return with an interrupt indication. The L1 is responsible for switching some registers to guest context, and the L0 switches others (including all the hypervisor privileged state). If the L2 MSR has TM active, then the L1 is responsible for recheckpointing the L2 TM state. Then the L1 exits to L0 via the H_ENTER_NESTED hcall, and the L0 saves the TM state as part of the exit, and then it recheckpoints the TM state as part of the nested entry and finally HRFIDs into the L2 with TM active MSR. Not efficient, but about the simplest approach for something that's horrendously complicated. Problems arise if the L1 exits to the L0 with a TM state which does not match the L2 TM state being requested. For example if the L1 is transactional but the L2 MSR is non-transactional, or vice versa. The L0's HRFID can take a TM Bad Thing interrupt and crash. Fix this by disallowing H_ENTER_NESTED in TM[T] state entirely, and then ensuring that if the L1 is suspended then the L2 must have TM active, and if the L1 is not suspended then the L2 must not have TM active. Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall") Cc: stable@vger.kernel.org # v4.20+ Reported-by: Alexey Kardashevskiy Acked-by: Michael Neuling Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_nested.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8543ad538b0c30..898f942eb19834 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); @@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) From 3b13911a2fd0dd0146c9777a254840c5466cf120 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 19:10:08 -0400 Subject: [PATCH 683/714] tracing: Synthetic event field_pos is an index not a boolean Performing the following: ># echo 'wakeup_lat s32 pid; u64 delta; char wake_comm[]' > synthetic_events ># echo 'hist:keys=pid:__arg__1=common_timestamp.usecs' > events/sched/sched_waking/trigger ># echo 'hist:keys=next_pid:pid=next_pid,delta=common_timestamp.usecs-$__arg__1:onmatch(sched.sched_waking).trace(wakeup_lat,$pid,$delta,prev_comm)'\ > events/sched/sched_switch/trigger ># echo 1 > events/synthetic/enable Crashed the kernel: BUG: kernel NULL pointer dereference, address: 000000000000001b #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.13.0-rc5-test+ #104 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:strlen+0x0/0x20 Code: f6 82 80 2b 0b bc 20 74 11 0f b6 50 01 48 83 c0 01 f6 82 80 2b 0b bc 20 75 ef c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 9 f8 c3 31 RSP: 0018:ffffaa75000d79d0 EFLAGS: 00010046 RAX: 0000000000000002 RBX: ffff9cdb55575270 RCX: 0000000000000000 RDX: ffff9cdb58c7a320 RSI: ffffaa75000d7b40 RDI: 000000000000001b RBP: ffffaa75000d7b40 R08: ffff9cdb40a4f010 R09: ffffaa75000d7ab8 R10: ffff9cdb4398c700 R11: 0000000000000008 R12: ffff9cdb58c7a320 R13: ffff9cdb55575270 R14: ffff9cdb58c7a000 R15: 0000000000000018 FS: 0000000000000000(0000) GS:ffff9cdb5aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000001b CR3: 00000000c0612006 CR4: 00000000001706e0 Call Trace: trace_event_raw_event_synth+0x90/0x1d0 action_trace+0x5b/0x70 event_hist_trigger+0x4bd/0x4e0 ? cpumask_next_and+0x20/0x30 ? update_sd_lb_stats.constprop.0+0xf6/0x840 ? __lock_acquire.constprop.0+0x125/0x550 ? find_held_lock+0x32/0x90 ? sched_clock_cpu+0xe/0xd0 ? lock_release+0x155/0x440 ? update_load_avg+0x8c/0x6f0 ? enqueue_entity+0x18a/0x920 ? __rb_reserve_next+0xe5/0x460 ? ring_buffer_lock_reserve+0x12a/0x3f0 event_triggers_call+0x52/0xe0 trace_event_buffer_commit+0x1ae/0x240 trace_event_raw_event_sched_switch+0x114/0x170 __traceiter_sched_switch+0x39/0x50 __schedule+0x431/0xb00 schedule_idle+0x28/0x40 do_idle+0x198/0x2e0 cpu_startup_entry+0x19/0x20 secondary_startup_64_no_verify+0xc2/0xcb The reason is that the dynamic events array keeps track of the field position of the fields array, via the field_pos variable in the synth_field structure. Unfortunately, that field is a boolean for some reason, which means any field_pos greater than 1 will be a bug (in this case it was 2). Link: https://lkml.kernel.org/r/20210721191008.638bce34@oasis.local.home Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: bd82631d7ccdc ("tracing: Add support for dynamic strings to synthetic events") Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_synth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h index 6e146b959dcd02..4007fe95cf42c1 100644 --- a/kernel/trace/trace_synth.h +++ b/kernel/trace/trace_synth.h @@ -14,10 +14,10 @@ struct synth_field { char *name; size_t size; unsigned int offset; + unsigned int field_pos; bool is_signed; bool is_string; bool is_dynamic; - bool field_pos; }; struct synth_event { From 1e3bac71c5053c99d438771fc9fa5082ae5d90aa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 11:00:53 -0400 Subject: [PATCH 684/714] tracing/histogram: Rename "cpu" to "common_cpu" Currently the histogram logic allows the user to write "cpu" in as an event field, and it will record the CPU that the event happened on. The problem with this is that there's a lot of events that have "cpu" as a real field, and using "cpu" as the CPU it ran on, makes it impossible to run histograms on the "cpu" field of events. For example, if I want to have a histogram on the count of the workqueue_queue_work event on its cpu field, running: ># echo 'hist:keys=cpu' > events/workqueue/workqueue_queue_work/trigger Gives a misleading and wrong result. Change the command to "common_cpu" as no event should have "common_*" fields as that's a reserved name for fields used by all events. And this makes sense here as common_cpu would be a field used by all events. Now we can even do: ># echo 'hist:keys=common_cpu,cpu if cpu < 100' > events/workqueue/workqueue_queue_work/trigger ># cat events/workqueue/workqueue_queue_work/hist # event histogram # # trigger info: hist:keys=common_cpu,cpu:vals=hitcount:sort=hitcount:size=2048 if cpu < 100 [active] # { common_cpu: 0, cpu: 2 } hitcount: 1 { common_cpu: 0, cpu: 4 } hitcount: 1 { common_cpu: 7, cpu: 7 } hitcount: 1 { common_cpu: 0, cpu: 7 } hitcount: 1 { common_cpu: 0, cpu: 1 } hitcount: 1 { common_cpu: 0, cpu: 6 } hitcount: 2 { common_cpu: 0, cpu: 5 } hitcount: 2 { common_cpu: 1, cpu: 1 } hitcount: 4 { common_cpu: 6, cpu: 6 } hitcount: 4 { common_cpu: 5, cpu: 5 } hitcount: 14 { common_cpu: 4, cpu: 4 } hitcount: 26 { common_cpu: 0, cpu: 0 } hitcount: 39 { common_cpu: 2, cpu: 2 } hitcount: 184 Now for backward compatibility, I added a trick. If "cpu" is used, and the field is not found, it will fall back to "common_cpu" and work as it did before. This way, it will still work for old programs that use "cpu" to get the actual CPU, but if the event has a "cpu" as a field, it will get that event's "cpu" field, which is probably what it wants anyway. I updated the tracefs/README to include documentation about both the common_timestamp and the common_cpu. This way, if that text is present in the README, then an application can know that common_cpu is supported over just plain "cpu". Link: https://lkml.kernel.org/r/20210721110053.26b4f641@oasis.local.home Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 8b7622bf94a44 ("tracing: Add cpu field for hist triggers") Reviewed-by: Tom Zanussi Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/histogram.rst | 2 +- kernel/trace/trace.c | 4 ++++ kernel/trace/trace_events_hist.c | 22 ++++++++++++++++------ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index b71e09f745c3da..f99be8062bc828 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -191,7 +191,7 @@ Documentation written by Tom Zanussi with the event, in nanoseconds. May be modified by .usecs to have timestamps interpreted as microseconds. - cpu int the cpu on which the event occurred. + common_cpu int the cpu on which the event occurred. ====================== ==== ======================================= Extended error information diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8b80b5bab712c..c59dd35a6da5c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5609,6 +5609,10 @@ static const char readme_msg[] = "\t [:name=histname1]\n" "\t [:.]\n" "\t [if ]\n\n" + "\t Note, special fields can be used as well:\n" + "\t common_timestamp - to record current timestamp\n" + "\t common_cpu - to record the CPU the event happened on\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 16a9dfc9fffc3a..34325f41ebc066 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1111,7 +1111,7 @@ static const char *hist_field_name(struct hist_field *field, field->flags & HIST_FIELD_FL_ALIAS) field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_CPU) - field_name = "cpu"; + field_name = "common_cpu"; else if (field->flags & HIST_FIELD_FL_EXPR || field->flags & HIST_FIELD_FL_VAR_REF) { if (field->system) { @@ -1991,14 +1991,24 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, hist_data->enable_timestamps = true; if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) hist_data->attrs->ts_in_usecs = true; - } else if (strcmp(field_name, "cpu") == 0) + } else if (strcmp(field_name, "common_cpu") == 0) *flags |= HIST_FIELD_FL_CPU; else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); - field = ERR_PTR(-EINVAL); - goto out; + /* + * For backward compatibility, if field_name + * was "cpu", then we treat this the same as + * common_cpu. + */ + if (strcmp(field_name, "cpu") == 0) { + *flags |= HIST_FIELD_FL_CPU; + } else { + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, + errpos(field_name)); + field = ERR_PTR(-EINVAL); + goto out; + } } } out: @@ -5085,7 +5095,7 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) seq_printf(m, "%s=", hist_field->var.name); if (hist_field->flags & HIST_FIELD_FL_CPU) - seq_puts(m, "cpu"); + seq_puts(m, "common_cpu"); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) From 9528c19507dc9bc3d6cd96f4611d7cb80c5afcde Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 19:53:41 -0400 Subject: [PATCH 685/714] tracing: Clean up alloc_synth_event() alloc_synth_event() currently has the following code to initialize the event fields and dynamic_fields: for (i = 0, j = 0; i < n_fields; i++) { event->fields[i] = fields[i]; if (fields[i]->is_dynamic) { event->dynamic_fields[j] = fields[i]; event->dynamic_fields[j]->field_pos = i; event->dynamic_fields[j++] = fields[i]; event->n_dynamic_fields++; } } 1) It would make more sense to have all fields keep track of their field_pos. 2) event->dynmaic_fields[j] is assigned twice for no reason. 3) We can move updating event->n_dynamic_fields outside the loop, and just assign it to j. This combination makes the code much cleaner. Link: https://lkml.kernel.org/r/20210721195341.29bb0f77@oasis.local.home Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 2ac75eb6aa86cb..9315fc03e3030b 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -893,15 +893,13 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields, dyn_event_init(&event->devent, &synth_event_ops); for (i = 0, j = 0; i < n_fields; i++) { + fields[i]->field_pos = i; event->fields[i] = fields[i]; - if (fields[i]->is_dynamic) { - event->dynamic_fields[j] = fields[i]; - event->dynamic_fields[j]->field_pos = i; + if (fields[i]->is_dynamic) event->dynamic_fields[j++] = fields[i]; - event->n_dynamic_fields++; - } } + event->n_dynamic_fields = j; event->n_fields = n_fields; out: return event; From 68e83498cb4fad31963b5c76a71e80b824bc316e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 21 Jul 2021 13:47:26 +0200 Subject: [PATCH 686/714] ftrace: Avoid synchronize_rcu_tasks_rude() call when not necessary synchronize_rcu_tasks_rude() triggers IPIs and forces rescheduling on all CPUs. It is a costly operation and, when targeting nohz_full CPUs, very disrupting (hence the name). So avoid calling it when 'old_hash' doesn't need to be freed. Link: https://lkml.kernel.org/r/20210721114726.1545103-1-nsaenzju@redhat.com Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e6fb3e6e1ffc22..4fbcf560dd0380 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5985,7 +5985,8 @@ ftrace_graph_release(struct inode *inode, struct file *file) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - synchronize_rcu_tasks_rude(); + if (old_hash != EMPTY_HASH) + synchronize_rcu_tasks_rude(); free_ftrace_hash(old_hash); } From 3b1a8f457fcf105924c72e99f1191834837c978d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jul 2021 13:09:15 +0100 Subject: [PATCH 687/714] ftrace: Remove redundant initialization of variable ret The variable ret is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Link: https://lkml.kernel.org/r/20210721120915.122278-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4fbcf560dd0380..7b180f61e6d3cc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7545,7 +7545,7 @@ int ftrace_is_dead(void) */ int register_ftrace_function(struct ftrace_ops *ops) { - int ret = -1; + int ret; ftrace_ops_init(ops); From 352384d5c84ebe40fa77098cc234fe173247d8ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 22 Jul 2021 21:52:18 -0400 Subject: [PATCH 688/714] tracepoints: Update static_call before tp_funcs when adding a tracepoint Because of the significant overhead that retpolines pose on indirect calls, the tracepoint code was updated to use the new "static_calls" that can modify the running code to directly call a function instead of using an indirect caller, and this function can be changed at runtime. In the tracepoint code that calls all the registered callbacks that are attached to a tracepoint, the following is done: it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); if (it_func_ptr) { __data = (it_func_ptr)->data; static_call(tp_func_##name)(__data, args); } If there's just a single callback, the static_call is updated to just call that callback directly. Once another handler is added, then the static caller is updated to call the iterator, that simply loops over all the funcs in the array and calls each of the callbacks like the old method using indirect calling. The issue was discovered with a race between updating the funcs array and updating the static_call. The funcs array was updated first and then the static_call was updated. This is not an issue as long as the first element in the old array is the same as the first element in the new array. But that assumption is incorrect, because callbacks also have a priority field, and if there's a callback added that has a higher priority than the callback on the old array, then it will become the first callback in the new array. This means that it is possible to call the old callback with the new callback data element, which can cause a kernel panic. static_call = callback1() funcs[] = {callback1,data1}; callback2 has higher priority than callback1 CPU 1 CPU 2 ----- ----- new_funcs = {callback2,data2}, {callback1,data1} rcu_assign_pointer(tp->funcs, new_funcs); /* * Now tp->funcs has the new array * but the static_call still calls callback1 */ it_func_ptr = tp->funcs [ new_funcs ] data = it_func_ptr->data [ data2 ] static_call(callback1, data); /* Now callback1 is called with * callback2's data */ [ KERNEL PANIC ] update_static_call(iterator); To prevent this from happening, always switch the static_call to the iterator before assigning the tp->funcs to the new array. The iterator will always properly match the callback with its data. To trigger this bug: In one terminal: while :; do hackbench 50; done In another terminal echo 1 > /sys/kernel/tracing/events/sched/sched_waking/enable while :; do echo 1 > /sys/kernel/tracing/set_event_pid; sleep 0.5 echo 0 > /sys/kernel/tracing/set_event_pid; sleep 0.5 done And it doesn't take long to crash. This is because the set_event_pid adds a callback to the sched_waking tracepoint with a high priority, which will be called before the sched_waking trace event callback is called. Note, the removal to a single callback updates the array first, before changing the static_call to single callback, which is the proper order as the first element in the array is the same as what the static_call is being changed to. Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") Reported-by: Stefan Metzmacher tested-by: Stefan Metzmacher Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 976bf8ce803967..fc32821f8240b7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -299,8 +299,8 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - rcu_assign_pointer(tp->funcs, tp_funcs); tracepoint_update_call(tp, tp_funcs, false); + rcu_assign_pointer(tp->funcs, tp_funcs); static_key_enable(&tp->key); release_probes(old); From 3c30ef0f78cfb36fdb13753794b0384cf7e37cc9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jul 2021 11:49:29 -0600 Subject: [PATCH 689/714] io_uring: never attempt iopoll reissue from release path There are two reasons why this shouldn't be done: 1) Ring is exiting, and we're canceling requests anyway. Any request should be canceled anyway. In theory, this could iterate for a number of times if someone else is also driving the target block queue into request starvation, however the likelihood of this happening is miniscule. 2) If the original task decided to pass the ring to another task, then we don't want to be reissuing from this context as it may be an unrelated task or context. No assumptions should be made about the context in which ->release() is run. This can only happen for pure read/write, and we'll get -EFAULT on them anyway. Link: https://lore.kernel.org/io-uring/YPr4OaHv0iv0KTOc@zeniv-ca.linux.org.uk/ Reported-by: Al Viro Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f2fe4eca150bfe..117dc32eb8a8be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2205,7 +2205,7 @@ static inline bool io_run_task_work(void) * Find and free completed poll iocbs */ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, - struct list_head *done) + struct list_head *done, bool resubmit) { struct req_batch rb; struct io_kiocb *req; @@ -2220,7 +2220,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && + if (READ_ONCE(req->result) == -EAGAIN && resubmit && !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); @@ -2244,7 +2244,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, - long min) + long min, bool resubmit) { struct io_kiocb *req, *tmp; LIST_HEAD(done); @@ -2287,7 +2287,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, } if (!list_empty(&done)) - io_iopoll_complete(ctx, nr_events, &done); + io_iopoll_complete(ctx, nr_events, &done, resubmit); return ret; } @@ -2305,7 +2305,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) while (!list_empty(&ctx->iopoll_list)) { unsigned int nr_events = 0; - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, false); /* let it sleep and repeat later if can't complete a request */ if (nr_events == 0) @@ -2367,7 +2367,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) list_empty(&ctx->iopoll_list)) break; } - ret = io_do_iopoll(ctx, &nr_events, min); + ret = io_do_iopoll(ctx, &nr_events, min, true); } while (!ret && nr_events < min && !need_resched()); out: mutex_unlock(&ctx->uring_lock); @@ -6798,7 +6798,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->iopoll_list)) - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, true); /* * Don't submit if refs are dying, good for io_uring_register(), From 991468dcf198bb87f24da330676724a704912b47 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jul 2021 11:53:54 -0600 Subject: [PATCH 690/714] io_uring: explicitly catch any illegal async queue attempt Catch an illegal case to queue async from an unrelated task that got the ring fd passed to it. This should not be possible to hit, but better be proactive and catch it explicitly. io-wq is extended to check for early IO_WQ_WORK_CANCEL being set on a work item as well, so it can run the request through the normal cancelation path. Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 ++++++- fs/io_uring.c | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 843d4a7bcd6e94..cf086b01c6c675 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) int work_flags; unsigned long flags; - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { + /* + * If io-wq is exiting for this task, or if the request has explicitly + * been marked as one that should not get executed, cancel it here. + */ + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + (work->flags & IO_WQ_WORK_CANCEL)) { io_run_cancel(work, wqe); return; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 117dc32eb8a8be..5a0fd6bcd3180a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1294,6 +1294,17 @@ static void io_queue_async_work(struct io_kiocb *req) /* init ->work of the whole link before punting */ io_prep_async_link(req); + + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); io_wq_enqueue(tctx->io_wq, &req->work); From 76f5dfacfb42b75e5782c017827877cfcee20474 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 23 Jul 2021 08:22:26 +0800 Subject: [PATCH 691/714] riscv: stacktrace: pin the task's stack in get_wchan Pin the task's stack before calling walk_stackframe() in get_wchan(). This can fix the panic as reported by Andreas when CONFIG_VMAP_STACK=y: [ 65.609696] Unable to handle kernel paging request at virtual address ffffffd0003bbde8 [ 65.610460] Oops [#1] [ 65.610626] Modules linked in: virtio_blk virtio_mmio rtc_goldfish btrfs blake2b_generic libcrc32c xor raid6_pq sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua efivarfs [ 65.611670] CPU: 2 PID: 1 Comm: systemd Not tainted 5.14.0-rc1-1.g34fe32a-default #1 openSUSE Tumbleweed (unreleased) c62f7109153e5a0897ee58ba52393ad99b070fd2 [ 65.612334] Hardware name: riscv-virtio,qemu (DT) [ 65.613008] epc : get_wchan+0x5c/0x88 [ 65.613334] ra : get_wchan+0x42/0x88 [ 65.613625] epc : ffffffff800048a4 ra : ffffffff8000488a sp : ffffffd00021bb90 [ 65.614008] gp : ffffffff817709f8 tp : ffffffe07fe91b80 t0 : 00000000000001f8 [ 65.614411] t1 : 0000000000020000 t2 : 0000000000000000 s0 : ffffffd00021bbd0 [ 65.614818] s1 : ffffffd0003bbdf0 a0 : 0000000000000001 a1 : 0000000000000002 [ 65.615237] a2 : ffffffff81618008 a3 : 0000000000000000 a4 : 0000000000000000 [ 65.615637] a5 : ffffffd0003bc000 a6 : 0000000000000002 a7 : ffffffe27d370000 [ 65.616022] s2 : ffffffd0003bbd90 s3 : ffffffff8071a81e s4 : 0000000000003fff [ 65.616407] s5 : ffffffffffffc000 s6 : 0000000000000000 s7 : ffffffff81618008 [ 65.616845] s8 : 0000000000000001 s9 : 0000000180000040 s10: 0000000000000000 [ 65.617248] s11: 000000000000016b t3 : 000000ff00000000 t4 : 0c6aec92de5e3fd7 [ 65.617672] t5 : fff78f60608fcfff t6 : 0000000000000078 [ 65.618088] status: 0000000000000120 badaddr: ffffffd0003bbde8 cause: 000000000000000d [ 65.618621] [] get_wchan+0x5c/0x88 [ 65.619008] [] do_task_stat+0x7a2/0xa46 [ 65.619325] [] proc_tgid_stat+0xe/0x16 [ 65.619637] [] proc_single_show+0x46/0x96 [ 65.619979] [] seq_read_iter+0x190/0x31e [ 65.620341] [] seq_read+0xc4/0x104 [ 65.620633] [] vfs_read+0x6a/0x112 [ 65.620922] [] ksys_read+0x54/0xbe [ 65.621206] [] sys_read+0xe/0x16 [ 65.621474] [] ret_from_syscall+0x0/0x2 [ 65.622169] ---[ end trace f24856ed2b8789c5 ]--- [ 65.622832] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ff467b98c3e336..ac7593607fa666 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -132,8 +132,12 @@ unsigned long get_wchan(struct task_struct *task) { unsigned long pc = 0; - if (likely(task && task != current && !task_is_running(task))) + if (likely(task && task != current && !task_is_running(task))) { + if (!try_get_task_stack(task)) + return 0; walk_stackframe(task, NULL, save_wchan, &pc); + put_task_stack(task); + } return pc; } From e71e2ace5721a8b921dca18b045069e7bb411277 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 23 Jul 2021 15:50:01 -0700 Subject: [PATCH 692/714] userfaultfd: do not untag user pointers Patch series "userfaultfd: do not untag user pointers", v5. If a user program uses userfaultfd on ranges of heap memory, it may end up passing a tagged pointer to the kernel in the range.start field of the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable allocator, or on Android if using the Tagged Pointers feature for MTE readiness [1]. When a fault subsequently occurs, the tag is stripped from the fault address returned to the application in the fault.address field of struct uffd_msg. However, from the application's perspective, the tagged address *is* the memory address, so if the application is unaware of memory tags, it may get confused by receiving an address that is, from its point of view, outside of the bounds of the allocation. We observed this behavior in the kselftest for userfaultfd [2] but other applications could have the same problem. Address this by not untagging pointers passed to the userfaultfd ioctls. Instead, let the system call fail. Also change the kselftest to use mmap so that it doesn't encounter this problem. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c This patch (of 2): Do not untag pointers passed to the userfaultfd ioctls. Instead, let the system call fail. This will provide an early indication of problems with tag-unaware userspace code instead of letting the code get confused later, and is consistent with how we decided to handle brk/mmap/mremap in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in brk()/mmap()/mremap()"), as well as being consistent with the existing tagged address ABI documentation relating to how ioctl arguments are handled. The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag user pointers") plus some fixups to some additional calls to validate_range that have appeared since then. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Reviewed-by: Catalin Marinas Cc: Alistair Delva Cc: Andrea Arcangeli Cc: Dave Martin Cc: Evgenii Stepanov Cc: Lokesh Gidra Cc: Mitch Phillips Cc: Vincenzo Frascino Cc: Will Deacon Cc: William McVicker Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/arm64/tagged-address-abi.rst | 26 +++++++++++++++------- fs/userfaultfd.c | 26 ++++++++++------------ 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index 459e6b66ff68c2..0c9120ec58ae62 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -45,14 +45,24 @@ how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space management (e.g. ``mprotect()``, ``madvise()``). The use of valid - tagged pointers in this context is allowed with the exception of - ``brk()``, ``mmap()`` and the ``new_address`` argument to - ``mremap()`` as these have the potential to alias with existing - user addresses. - - NOTE: This behaviour changed in v5.6 and so some earlier kernels may - incorrectly accept valid tagged pointers for the ``brk()``, - ``mmap()`` and ``mremap()`` system calls. + tagged pointers in this context is allowed with these exceptions: + + - ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. + + - The ``range.start``, ``start`` and ``dst`` arguments to the + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from + ``userfaultfd()``, as fault addresses subsequently obtained by reading + the file descriptor will be untagged, which may otherwise confuse + tag-unaware programs. + + NOTE: This behaviour changed in v5.14 and so some earlier kernels may + incorrectly accept valid tagged pointers for this system call. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6e0f0c0d0e577..5c2d806e6ae53f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, } static __always_inline int validate_range(struct mm_struct *mm, - __u64 *start, __u64 len) + __u64 start, __u64 len) { __u64 task_size = mm->task_size; - *start = untagged_addr(*start); - - if (*start & ~PAGE_MASK) + if (start & ~PAGE_MASK) return -EINVAL; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; - if (*start < mmap_min_addr) + if (start < mmap_min_addr) return -EINVAL; - if (*start >= task_size) + if (start >= task_size) return -EINVAL; - if (len > task_size - *start) + if (len > task_size - start) return -EINVAL; return 0; } @@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vm_flags |= VM_UFFD_MINOR; } - ret = validate_range(mm, &uffdio_register.range.start, + ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; @@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; - ret = validate_range(mm, &uffdio_unregister.start, + ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; @@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; @@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, sizeof(uffdio_copy)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; /* @@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; @@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, sizeof(struct uffdio_writeprotect))) return -EFAULT; - ret = validate_range(ctx->mm, &uffdio_wp.range.start, + ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; @@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; - ret = validate_range(ctx->mm, &uffdio_continue.range.start, + ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; From 0db282ba2c12c1515d490d14a1ff696643ab0f1b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 23 Jul 2021 15:50:04 -0700 Subject: [PATCH 693/714] selftest: use mmap instead of posix_memalign to allocate memory This test passes pointers obtained from anon_allocate_area to the userfaultfd and mremap APIs. This causes a problem if the system allocator returns tagged pointers because with the tagged address ABI the kernel rejects tagged addresses passed to these APIs, which would end up causing the test to fail. To make this test compatible with such system allocators, stop using the system allocator to allocate memory in anon_allocate_area, and instead just use mmap. Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241 Fixes: c47174fc362a ("userfaultfd: selftest") Co-developed-by: Lokesh Gidra Signed-off-by: Lokesh Gidra Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Cc: Vincenzo Frascino Cc: Dave Martin Cc: Will Deacon Cc: Andrea Arcangeli Cc: Alistair Delva Cc: William McVicker Cc: Evgenii Stepanov Cc: Mitch Phillips Cc: Andrey Konovalov Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d4e..2ea438e6b8b1f8 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) From 32ae8a0669392248a92d7545a7363004543f3932 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Fri, 23 Jul 2021 15:50:08 -0700 Subject: [PATCH 694/714] kfence: defer kfence_test_init to ensure that kunit debugfs is created kfence_test_init and kunit_init both use the same level late_initcall, which means if kfence_test_init linked ahead of kunit_init, kfence_test_init will get a NULL debugfs_rootdir as parent dentry, then kfence_test_init and kfence_debugfs_init both create a debugfs node named "kfence" under debugfs_mount->mnt_root, and it will throw out "debugfs: Directory 'kfence' with parent '/' already present!" with EEXIST. So kfence_test_init should be deferred. Link: https://lkml.kernel.org/r/20210714113140.2949995-1-o451686892@gmail.com Signed-off-by: Weizhao Ouyang Tested-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/kfence_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 7f24b9bcb2ec55..942cbc16ad26ba 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -852,7 +852,7 @@ static void kfence_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall(kfence_test_init); +late_initcall_sync(kfence_test_init); module_exit(kfence_test_exit); MODULE_LICENSE("GPL v2"); From 235a85cb32bb123854ad31de46fdbf04c1d57cda Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 23 Jul 2021 15:50:11 -0700 Subject: [PATCH 695/714] kfence: move the size check to the beginning of __kfence_alloc() Check the allocation size before toggling kfence_allocation_gate. This way allocations that can't be served by KFENCE will not result in waiting for another CONFIG_KFENCE_SAMPLE_INTERVAL without allocating anything. Link: https://lkml.kernel.org/r/20210714092222.1890268-1-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Marco Elver Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Marco Elver Cc: Greg Kroah-Hartman Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index d7666ace9d2e4a..2623ff401a104a 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -733,6 +733,13 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { + /* + * Perform size check before switching kfence_allocation_gate, so that + * we don't disable KFENCE without making an allocation. + */ + if (size > PAGE_SIZE) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention @@ -757,9 +764,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - if (size > PAGE_SIZE) - return NULL; - return kfence_guarded_alloc(s, size, flags); } From 236e9f1538523d3d380dda1cc99571d587058f37 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 23 Jul 2021 15:50:14 -0700 Subject: [PATCH 696/714] kfence: skip all GFP_ZONEMASK allocations Allocation requests outside ZONE_NORMAL (MOVABLE, HIGHMEM or DMA) cannot be fulfilled by KFENCE, because KFENCE memory pool is located in a zone different from the requested one. Because callers of kmem_cache_alloc() may actually rely on the allocation to reside in the requested zone (e.g. memory allocations done with __GFP_DMA must be DMAable), skip all allocations done with GFP_ZONEMASK and/or respective SLAB flags (SLAB_CACHE_DMA and SLAB_CACHE_DMA32). Link: https://lkml.kernel.org/r/20210714092222.1890268-2-glider@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Alexander Potapenko Reviewed-by: Marco Elver Acked-by: Souptick Joarder Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Souptick Joarder Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 2623ff401a104a..575c685aa64229 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -740,6 +740,15 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (size > PAGE_SIZE) return NULL; + /* + * Skip allocations from non-default zones, including DMA. We cannot + * guarantee that pages in the KFENCE pool will have the requested + * properties (e.g. reside in DMAable memory). + */ + if ((flags & GFP_ZONEMASK) || + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention From 8dad53a11f8d94dceb540a5f8f153484f42be84b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:17 -0700 Subject: [PATCH 697/714] mm: call flush_dcache_page() in memcpy_to_page() and memzero_page() memcpy_to_page and memzero_page can write to arbitrary pages, which could be in the page cache or in high memory, so call flush_kernel_dcache_pages to flush the dcache. This is a problem when using these helpers on dcache challeneged architectures. Right now there are just a few users, chances are no one used the PC floppy driver, the aha1542 driver for an ISA SCSI HBA, and a few advanced and optional btrfs and ext4 features on those platforms yet since the conversion. Link: https://lkml.kernel.org/r/20210713055231.137602-2-hch@lst.de Fixes: bb90d4bc7b6a ("mm/highmem: Lift memcpy_[to|from]_page to core") Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Cc: Chaitanya Kulkarni Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c8754..8e7e50a53a1290 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -318,6 +318,7 @@ static inline void memcpy_to_page(struct page *page, size_t offset, VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); + flush_dcache_page(page); kunmap_local(to); } @@ -325,6 +326,7 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_atomic(page); memset(addr + offset, 0, len); + flush_dcache_page(page); kunmap_atomic(addr); } From d9a42b53bdf7b0329dc09a59fc1b092640b6da19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:20 -0700 Subject: [PATCH 698/714] mm: use kmap_local_page in memzero_page The commit message introducing the global memzero_page explicitly mentions switching to kmap_local_page in the commit log but doesn't actually do that. Link: https://lkml.kernel.org/r/20210713055231.137602-3-hch@lst.de Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8e7e50a53a1290..d9a606a9fc64ae 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -324,10 +324,10 @@ static inline void memcpy_to_page(struct page *page, size_t offset, static inline void memzero_page(struct page *page, size_t offset, size_t len) { - char *addr = kmap_atomic(page); + char *addr = kmap_local_page(page); memset(addr + offset, 0, len); flush_dcache_page(page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ From 69e5d322a2fb86173fde8bad26e8eb38cad1b1e9 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 23 Jul 2021 15:50:23 -0700 Subject: [PATCH 699/714] mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction To reproduce the failure we need the following system: - kernel command: page_poison=1 init_on_free=0 init_on_alloc=0 - kernel config: * CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y * CONFIG_INIT_ON_FREE_DEFAULT_ON=y * CONFIG_PAGE_POISONING=y Resulting in: 0000000085629bdd: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 0000000022861832: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000c597f5b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 11 PID: 15195 Comm: bash Kdump: loaded Tainted: G U O 5.13.1-gentoo-x86_64 #1 Hardware name: System manufacturer System Product Name/PRIME Z370-A, BIOS 2801 01/13/2021 Call Trace: dump_stack+0x64/0x7c __kernel_unpoison_pages.cold+0x48/0x84 post_alloc_hook+0x60/0xa0 get_page_from_freelist+0xdb8/0x1000 __alloc_pages+0x163/0x2b0 __get_free_pages+0xc/0x30 pgd_alloc+0x2e/0x1a0 mm_init+0x185/0x270 dup_mm+0x6b/0x4f0 copy_process+0x190d/0x1b10 kernel_clone+0xba/0x3b0 __do_sys_clone+0x8f/0xb0 do_syscall_64+0x68/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Before commit 51cba1ebc60d ("init_on_alloc: Optimize static branches") init_on_alloc never enabled static branch by default. It could only be enabed explicitly by init_mem_debugging_and_hardening(). But after commit 51cba1ebc60d, a static branch could already be enabled by default. There was no code to ever disable it. That caused page_poison=1 / init_on_free=1 conflict. This change extends init_mem_debugging_and_hardening() to also disable static branch disabling. Link: https://lkml.kernel.org/r/20210714031935.4094114-1-keescook@chromium.org Link: https://lore.kernel.org/r/20210712215816.1512739-1-slyfox@gentoo.org Fixes: 51cba1ebc60d ("init_on_alloc: Optimize static branches") Signed-off-by: Sergei Trofimovich Signed-off-by: Kees Cook Co-developed-by: Kees Cook Reported-by: Mikhail Morfikov Reported-by: Tested-by: Reviewed-by: David Hildenbrand Cc: Alexander Potapenko Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e97e68aef7a89..856b175c15a4fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void) } #endif - if (_init_on_alloc_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc\n"); - else - static_branch_enable(&init_on_alloc); - } - if (_init_on_free_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_free\n"); - else - static_branch_enable(&init_on_free); + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && + page_poisoning_requested) { + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_alloc and init_on_free\n"); + _init_on_alloc_enabled_early = false; + _init_on_free_enabled_early = false; } + if (_init_on_alloc_enabled_early) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + + if (_init_on_free_enabled_early) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + #ifdef CONFIG_DEBUG_PAGEALLOC if (!debug_pagealloc_enabled()) return; From 79e482e9c3ae86e849c701c846592e72baddda5a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 23 Jul 2021 15:50:26 -0700 Subject: [PATCH 700/714] memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions Commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") didn't take into account that when there is movable_node parameter in the kernel command line, for_each_mem_range() would skip ranges marked with MEMBLOCK_HOTPLUG. The page table setup code in POWER uses for_each_mem_range() to create the linear mapping of the physical memory and since the regions marked as MEMORY_HOTPLUG are skipped, they never make it to the linear map. A later access to the memory in those ranges will fail: BUG: Unable to handle kernel data access on write at 0xc000000400000000 Faulting instruction address: 0xc00000000008a3c0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 0 PID: 53 Comm: kworker/u2:0 Not tainted 5.13.0 #7 NIP: c00000000008a3c0 LR: c0000000003c1ed8 CTR: 0000000000000040 REGS: c000000008a57770 TRAP: 0300 Not tainted (5.13.0) MSR: 8000000002009033 CR: 84222202 XER: 20040000 CFAR: c0000000003c1ed4 DAR: c000000400000000 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000003c1ed8 c000000008a57a10 c0000000019da700 c000000400000000 GPR04: 0000000000000280 0000000000000180 0000000000000400 0000000000000200 GPR08: 0000000000000100 0000000000000080 0000000000000040 0000000000000300 GPR12: 0000000000000380 c000000001bc0000 c0000000001660c8 c000000006337e00 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000040000000 0000000020000000 c000000001a81990 c000000008c30000 GPR24: c000000008c20000 c000000001a81998 000fffffffff0000 c000000001a819a0 GPR28: c000000001a81908 c00c000001000000 c000000008c40000 c000000008a64680 NIP clear_user_page+0x50/0x80 LR __handle_mm_fault+0xc88/0x1910 Call Trace: __handle_mm_fault+0xc44/0x1910 (unreliable) handle_mm_fault+0x130/0x2a0 __get_user_pages+0x248/0x610 __get_user_pages_remote+0x12c/0x3e0 get_arg_page+0x54/0xf0 copy_string_kernel+0x11c/0x210 kernel_execve+0x16c/0x220 call_usermodehelper_exec_async+0x1b0/0x2f0 ret_from_kernel_thread+0x5c/0x70 Instruction dump: 79280fa4 79271764 79261f24 794ae8e2 7ca94214 7d683a14 7c893a14 7d893050 7d4903a6 60000000 60000000 60000000 <7c001fec> 7c091fec 7c081fec 7c051fec ---[ end trace 490b8c67e6075e09 ]--- Making for_each_mem_range() include MEMBLOCK_HOTPLUG regions in the traversal fixes this issue. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1976100 Link: https://lkml.kernel.org/r/20210712071132.20902-1-rppt@kernel.org Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Mike Rapoport Tested-by: Greg Kurz Reviewed-by: David Hildenbrand Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- mm/memblock.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cbf46f56d1053b..4a53c3ca86bdcb 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas diff --git a/mm/memblock.c b/mm/memblock.c index 0041ff62c584e7..de7b553baa5000 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, return true; /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ From b43a9e76b4cc78cdaa8c809dd31cd452797b7661 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 23 Jul 2021 15:50:29 -0700 Subject: [PATCH 701/714] writeback, cgroup: remove wb from offline list before releasing refcnt Boyang reported that the commit c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") causes the kernel to crash while running xfstests generic/256 on ext4 on aarch64 and ppc64le. run fstests generic/256 at 2021-07-12 05:41:40 EXT4-fs (vda3): mounted filesystem with ordered data mode. Opts: . Quota mode: none. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x96000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 64k pages, 48-bit VAs, pgdp=00000000b0502000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000005 [#1] SMP Modules linked in: dm_flakey dm_snapshot dm_bufio dm_zero dm_mod loop tls rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc ext4 vfat fat mbcache jbd2 drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_blk virtio_net net_failover virtio_console failover virtio_mmio aes_neon_bs [last unloaded: scsi_debug] CPU: 0 PID: 408468 Comm: kworker/u8:5 Tainted: G X --------- --- 5.14.0-0.rc1.15.bx.el9.aarch64 #1 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Workqueue: events_unbound cleanup_offline_cgwbs_workfn pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO BTYPE=--) pc : cleanup_offline_cgwbs_workfn+0x320/0x394 lr : cleanup_offline_cgwbs_workfn+0xe0/0x394 sp : ffff80001554fd10 x29: ffff80001554fd10 x28: 0000000000000000 x27: 0000000000000001 x26: 0000000000000000 x25: 00000000000000e0 x24: ffffd2a2fbe671a8 x23: ffff80001554fd88 x22: ffffd2a2fbe67198 x21: ffffd2a2fc25a730 x20: ffff210412bc3000 x19: ffff210412bc3280 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000030 x12: 0000000000000040 x11: ffff210481572238 x10: ffff21048157223a x9 : ffffd2a2fa276c60 x8 : ffff210484106b60 x7 : 0000000000000000 x6 : 000000000007d18a x5 : ffff210416a86400 x4 : ffff210412bc0280 x3 : 0000000000000000 x2 : ffff80001554fd88 x1 : ffff210412bc0280 x0 : 0000000000000003 Call trace: cleanup_offline_cgwbs_workfn+0x320/0x394 process_one_work+0x1f4/0x4b0 worker_thread+0x184/0x540 kthread+0x114/0x120 ret_from_fork+0x10/0x18 Code: d63f0020 97f99963 17ffffa6 f8588263 (f9400061) ---[ end trace e250fe289272792a ]--- Kernel panic - not syncing: Oops: Fatal exception SMP: stopping secondary CPUs SMP: failed to stop secondary CPUs 0-2 Kernel Offset: 0x52a2e9fa0000 from 0xffff800010000000 PHYS_OFFSET: 0xfff0defca0000000 CPU features: 0x00200251,23200840 Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- The problem happens when cgwb_release_workfn() races with cleanup_offline_cgwbs_workfn(): wb_tryget() in cleanup_offline_cgwbs_workfn() can be called after percpu_ref_exit() is cgwb_release_workfn(), which is basically a use-after-free error. Fix the problem by making removing the writeback structure from the offline list before releasing the percpu reference counter. It will guarantee that cleanup_offline_cgwbs_workfn() will not see and not access writeback structures which are about to be released. Link: https://lkml.kernel.org/r/20210716201039.3762203-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Boyang Xue Suggested-by: Jan Kara Tested-by: Darrick J. Wong Cc: Will Deacon Cc: Dave Chinner Cc: Murphy Zhou Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 271f2ca862c82c..f5561ea7d90ad6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); - percpu_ref_exit(&wb->refcnt); spin_lock_irq(&cgwb_lock); list_del(&wb->offline_node); spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); From 593311e85b26ecc6e4d45b6fb81b942b6672df09 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 23 Jul 2021 15:50:32 -0700 Subject: [PATCH 702/714] writeback, cgroup: do not reparent dax inodes The inode switching code is not suited for dax inodes. An attempt to switch a dax inode to a parent writeback structure (as a part of a writeback cleanup procedure) results in a panic like this: run fstests generic/270 at 2021-07-15 05:54:02 XFS (pmem0p2): EXPERIMENTAL big timestamp feature in use. Use at your own risk! XFS (pmem0p2): DAX enabled. Warning: EXPERIMENTAL, use at your own risk XFS (pmem0p2): EXPERIMENTAL inode btree counters feature in use. Use at your own risk! XFS (pmem0p2): Mounting V5 Filesystem XFS (pmem0p2): Ending clean mount XFS (pmem0p2): Quotacheck needed: Please wait. XFS (pmem0p2): Quotacheck: Done. XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) BUG: unable to handle page fault for address: 0000000005b0f669 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 13 PID: 10479 Comm: kworker/13:16 Not tainted 5.14.0-rc1-master-8096acd7442e+ #8 Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 09/13/2016 Workqueue: inode_switch_wbs inode_switch_wbs_work_fn RIP: 0010:inode_do_switch_wbs+0xaf/0x470 Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 Call Trace: inode_switch_wbs_work_fn+0xb6/0x2a0 process_one_work+0x1e6/0x380 worker_thread+0x53/0x3d0 kthread+0x10f/0x130 ret_from_fork+0x22/0x30 Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables nfnetlink bridge stp llc rfkill sunrpc intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm mgag200 i2c_algo_bit iTCO_wdt irqbypass drm_kms_helper iTCO_vendor_support acpi_ipmi rapl syscopyarea sysfillrect intel_cstate ipmi_si sysimgblt ioatdma dax_pmem_compat fb_sys_fops ipmi_devintf device_dax i2c_i801 pcspkr intel_uncore hpilo nd_pmem cec dax_pmem_core dca i2c_smbus acpi_tad lpc_ich ipmi_msghandler acpi_power_meter drm fuse xfs libcrc32c sd_mod t10_pi crct10dif_pclmul crc32_pclmul crc32c_intel tg3 ghash_clmulni_intel serio_raw hpsa hpwdt scsi_transport_sas wmi dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000005b0f669 ---[ end trace ed2105faff8384f3 ]--- RIP: 0010:inode_do_switch_wbs+0xaf/0x470 Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x15200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]--- The crash happens on an attempt to iterate over attached pagecache pages and check the dirty flag: a dax inode's xarray contains pfn's instead of generic struct page pointers. This happens for DAX and not for other kinds of non-page entries in the inodes because it's a tagged iteration, and shadow/swap entries are never tagged; only DAX entries get tagged. Fix the problem by bailing out (with the false return value) of inode_prepare_sbs_switch() if a dax inode is passed. [willy@infradead.org: changelog addition] Link: https://lkml.kernel.org/r/20210719171350.3876830-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Murphy Zhou Reported-by: Darrick J. Wong Tested-by: Darrick J. Wong Tested-by: Murphy Zhou Acked-by: Matthew Wilcox (Oracle) Cc: Jan Kara Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06d04a74ab6c70..4c33705489825a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode, */ smp_mb(); + if (IS_DAX(inode)) + return false; + /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || From af64237461910f4c7365d367291d1c4f20c18769 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 23 Jul 2021 15:50:35 -0700 Subject: [PATCH 703/714] mm/secretmem: wire up ->set_page_dirty Make secretmem up to date with the changes done in commit 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") so that unconditional call to this method won't cause crashes. Link: https://lkml.kernel.org/r/20210716063933.31633-1-rppt@kernel.org Fixes: 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index f77d25467a14ad..030f02ddc7c1dc 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page) } const struct address_space_operations secretmem_aops = { + .set_page_dirty = __set_page_dirty_no_writeback, .freepage = secretmem_freepage, .migratepage = secretmem_migratepage, .isolate_page = secretmem_isolate_page, From e904c2ccf9b5cb356eec754ffea05c08984f6535 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 23 Jul 2021 15:50:38 -0700 Subject: [PATCH 704/714] mm: mmap_lock: fix disabling preemption directly Commit 832b50725373 ("mm: mmap_lock: use local locks instead of disabling preemption") fixed a bug by using local locks. But commit d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") changed those lines back to the original version. I guess it was introduced by fixing conflicts. Link: https://lkml.kernel.org/r/20210720074228.76342-1-songmuchun@bytedance.com Fixes: d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") Signed-off-by: Muchun Song Acked-by: Mel Gorman Reviewed-by: Yang Shi Reviewed-by: Pankaj Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap_lock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index f5852a058ce0bc..1854850b4b897f 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void) #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ const char *memcg_path; \ - preempt_disable(); \ + local_lock(&memcg_paths.lock); \ memcg_path = get_mm_memcg_path(mm); \ trace_mmap_lock_##type(mm, \ memcg_path != NULL ? memcg_path : "", \ ##__VA_ARGS__); \ if (likely(memcg_path != NULL)) \ put_memcg_path_buf(); \ - preempt_enable(); \ + local_unlock(&memcg_paths.lock); \ } while (0) #else /* !CONFIG_MEMCG */ From e4dc3489143f84f7ed30be58b886bb6772f229b9 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 23 Jul 2021 15:50:41 -0700 Subject: [PATCH 705/714] mm: fix the deadlock in finish_fault() Commit 63f3655f9501 ("mm, memcg: fix reclaim deadlock with writeback") fix the following ABBA deadlock by pre-allocating the pte page table without holding the page lock. lock_page(A) SetPageWriteback(A) unlock_page(A) lock_page(B) lock_page(B) pte_alloc_one shrink_page_list wait_on_page_writeback(A) SetPageWriteback(B) unlock_page(B) # flush A, B to clear the writeback Commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") reworked the relevant code but ignored this race. This will cause the deadlock above to appear again, so fix it. Link: https://lkml.kernel.org/r/20210721074849.57004-1-zhengqi.arch@bytedance.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Qi Zheng Acked-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 747a01d495f2c5..25fc46e872142a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(vma->vm_mm); + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; + } } /* See comment in handle_pte_fault() */ From e0f7e2b2f7e7864238a4eea05cc77ae1be2bf784 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 23 Jul 2021 15:50:44 -0700 Subject: [PATCH 706/714] hugetlbfs: fix mount mode command line processing In commit 32021982a324 ("hugetlbfs: Convert to fs_context") processing of the mount mode string was changed from match_octal() to fsparam_u32. This changed existing behavior as match_octal does not require octal values to have a '0' prefix, but fsparam_u32 does. Use fsparam_u32oct which provides the same behavior as match_octal. Link: https://lkml.kernel.org/r/20210721183326.102716-1-mike.kravetz@oracle.com Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") Signed-off-by: Mike Kravetz Reported-by: Dennis Camera Reviewed-by: Matthew Wilcox (Oracle) Cc: David Howells Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 926eeb9bf4ebeb..cdfb1ae78a3f84 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -77,7 +77,7 @@ enum hugetlb_param { static const struct fs_parameter_spec hugetlb_fs_parameters[] = { fsparam_u32 ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), - fsparam_u32 ("mode", Opt_mode), + fsparam_u32oct("mode", Opt_mode), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("pagesize", Opt_pagesize), fsparam_string("size", Opt_size), From 6010d300f9f7e16d1bf327b4730bcd0c0886d9e6 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:50:52 +0900 Subject: [PATCH 707/714] riscv: __asm_copy_to-from_user: Fix: overrun copy There were two causes for the overrun memory access. The threshold size was too small. The aligning dst require one SZREG and unrolling word copy requires 8*SZREG, total have to be at least 9*SZREG. Inside the unrolling copy, the subtracting -(8*SZREG-1) would make iteration happening one extra loop. Proper value is -(8*SZREG). Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index bceb0629e440eb..8bbeca89a93f00 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -35,7 +35,7 @@ ENTRY(__asm_copy_from_user) /* * Use byte copy only if too small. */ - li a3, 8*SZREG /* size must be larger than size in word_copy */ + li a3, 9*SZREG /* size must be larger than size in word_copy */ bltu a2, a3, .Lbyte_copy_tail /* @@ -75,7 +75,7 @@ ENTRY(__asm_copy_from_user) * a3 - a1 & mask:(SZREG-1) * t0 - end of aligned dst */ - addi t0, t0, -(8*SZREG-1) /* not to over run */ + addi t0, t0, -(8*SZREG) /* not to over run */ 2: fixup REG_L a4, 0(a1), 10f fixup REG_L a5, SZREG(a1), 10f @@ -97,7 +97,7 @@ ENTRY(__asm_copy_from_user) addi a1, a1, 8*SZREG bltu a0, t0, 2b - addi t0, t0, 8*SZREG-1 /* revert to original value */ + addi t0, t0, 8*SZREG /* revert to original value */ j .Lbyte_copy_tail .Lshift_copy: From 22b5f16ffeff38938ad7420a2bfa3c281c36fd17 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:51:45 +0900 Subject: [PATCH 708/714] riscv: __asm_copy_to-from_user: Fix: fail on RV32 Had a bug when converting bytes to bits when the cpu was rv32. The a3 contains the number of bytes and multiple of 8 would be the bits. The LGREG is holding 2 for RV32 and 3 for RV32, so to achieve multiple of 8 it must always be constant 3. The 2 was mistakenly used for rv32. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 8bbeca89a93f00..279876821969b3 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -125,7 +125,7 @@ ENTRY(__asm_copy_from_user) * t3 - prev shift * t4 - current shift */ - slli t3, a3, LGREG + slli t3, a3, 3 /* converting bytes in a3 to bits */ li a5, SZREG*8 sub t4, a5, t3 From d4b3e0105e3c2411af666a50b1bf2d25656a5e83 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:52:36 +0900 Subject: [PATCH 709/714] riscv: __asm_copy_to-from_user: Remove unnecessary size check Clean up: The size of 0 will be evaluated in the next step. Not required here. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 279876821969b3..54d497a0316425 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -30,7 +30,6 @@ ENTRY(__asm_copy_from_user) * t0 - end of uncopied dst */ add t0, a0, a2 - bgtu a0, t0, 5f /* * Use byte copy only if too small. From ea196c548c0ac407afd31d142712b6da8bd00244 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:53:23 +0900 Subject: [PATCH 710/714] riscv: __asm_copy_to-from_user: Fix: Typos in comments Fixing typos and grammar mistakes and using more intuitive label name. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 54d497a0316425..63bc691cff91b2 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -33,19 +33,20 @@ ENTRY(__asm_copy_from_user) /* * Use byte copy only if too small. + * SZREG holds 4 for RV32 and 8 for RV64 */ li a3, 9*SZREG /* size must be larger than size in word_copy */ bltu a2, a3, .Lbyte_copy_tail /* - * Copy first bytes until dst is align to word boundary. + * Copy first bytes until dst is aligned to word boundary. * a0 - start of dst * t1 - start of aligned dst */ addi t1, a0, SZREG-1 andi t1, t1, ~(SZREG-1) /* dst is already aligned, skip */ - beq a0, t1, .Lskip_first_bytes + beq a0, t1, .Lskip_align_dst 1: /* a5 - one byte for copying data */ fixup lb a5, 0(a1), 10f @@ -54,7 +55,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t1, 1b /* t1 - start of aligned dst */ -.Lskip_first_bytes: +.Lskip_align_dst: /* * Now dst is aligned. * Use shift-copy if src is misaligned. @@ -71,7 +72,6 @@ ENTRY(__asm_copy_from_user) * * a0 - start of aligned dst * a1 - start of aligned src - * a3 - a1 & mask:(SZREG-1) * t0 - end of aligned dst */ addi t0, t0, -(8*SZREG) /* not to over run */ @@ -106,7 +106,7 @@ ENTRY(__asm_copy_from_user) * For misaligned copy we still perform aligned word copy, but * we need to use the value fetched from the previous iteration and * do some shifts. - * This is safe because reading less than a word size. + * This is safe because reading is less than a word size. * * a0 - start of aligned dst * a1 - start of src @@ -116,7 +116,7 @@ ENTRY(__asm_copy_from_user) */ /* calculating aligned word boundary for dst */ andi t1, t0, ~(SZREG-1) - /* Converting unaligned src to aligned arc */ + /* Converting unaligned src to aligned src */ andi a1, a1, ~(SZREG-1) /* @@ -128,7 +128,7 @@ ENTRY(__asm_copy_from_user) li a5, SZREG*8 sub t4, a5, t3 - /* Load the first word to combine with seceond word */ + /* Load the first word to combine with second word */ fixup REG_L a5, 0(a1), 10f 3: @@ -160,7 +160,7 @@ ENTRY(__asm_copy_from_user) * a1 - start of remaining src * t0 - end of remaining dst */ - bgeu a0, t0, 5f + bgeu a0, t0, .Lout_copy_user /* check if end of copy */ 4: fixup lb a5, 0(a1), 10f addi a1, a1, 1 /* src */ @@ -168,7 +168,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t0, 4b /* t0 - end of dst */ -5: +.Lout_copy_user: /* Disable access to user memory */ csrc CSR_STATUS, t6 li a0, 0 From fc68f42aa737dc15e7665a4101d4168aadb8e4c4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jul 2021 15:25:54 -0700 Subject: [PATCH 711/714] ACPI: fix NULL pointer dereference Commit 71f642833284 ("ACPI: utils: Fix reference counting in for_each_acpi_dev_match()") started doing "acpi_dev_put()" on a pointer that was possibly NULL. That fails miserably, because that helper inline function is not set up to handle that case. Just make acpi_dev_put() silently accept a NULL pointer, rather than calling down to put_device() with an invalid offset off that NULL pointer. Link: https://lore.kernel.org/lkml/a607c149-6bf6-0fd0-0e31-100378504da2@kernel.dk/ Reported-and-tested-by: Jens Axboe Tested-by: Daniel Scally Cc: Andy Shevchenko Signed-off-by: Linus Torvalds --- include/acpi/acpi_bus.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b9d434a936320b..13d93371790ec9 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -720,7 +720,8 @@ static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev) static inline void acpi_dev_put(struct acpi_device *adev) { - put_device(&adev->dev); + if (adev) + put_device(&adev->dev); } struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); From a1833a54033e4ca760ad58fa2a6469ad59b3fa1a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jul 2021 11:06:37 -0700 Subject: [PATCH 712/714] smpboot: fix duplicate and misplaced inlining directive gcc doesn't care, but clang quite reasonably pointed out that the recent commit e9ba16e68cce ("smpboot: Mark idle_init() as __always_inlined to work around aggressive compiler un-inlining") did some really odd things: kernel/smpboot.c:50:20: warning: duplicate 'inline' declaration specifier [-Wduplicate-decl-specifier] static inline void __always_inline idle_init(unsigned int cpu) ^ which not only has that duplicate inlining specifier, but the new __always_inline was put in the wrong place of the function definition. We put the storage class specifiers (ie things like "static" and "extern") first, and the type information after that. And while the compiler may not care, we put the inline specifier before the types. So it should be just static __always_inline void idle_init(unsigned int cpu) instead. Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 21b7953f824215..cf6acab7853848 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void) * * Creates the thread if it does not exist. */ -static inline void __always_inline idle_init(unsigned int cpu) +static __always_inline void idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); From ff1176468d368232b684f75e82563369208bc371 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jul 2021 15:35:14 -0700 Subject: [PATCH 713/714] Linux 5.14-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e4f5895badb5eb..6b555f64df0682 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Opossums on Parade # *DOCUMENTATION* From ba8fa5d74907a61357e909e5079a124e0190f702 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 29 Jul 2021 18:57:46 +0200 Subject: [PATCH 714/714] rust: one more skip flag for `bindgen` Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 233b8f365482e4..60f97b3d357437 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -202,7 +202,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -Wno-packed-not-aligned -Wno-format-truncation -Wno-format-overflow \ -Wno-stringop-truncation -Wno-unused-but-set-variable \ -Wno-stringop-overflow -Wno-restrict -Wno-maybe-uninitialized \ - -Werror=designated-init -Wno-zero-length-bounds \ + -Werror=designated-init -Wno-zero-length-bounds -Wimplicit-fallthrough=% \ --param=% --param asan-% # PowerPC