From f35f8414aafcc033fa706cda31062a0aaf933176 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Sat, 21 Dec 2024 11:08:22 +0100 Subject: [PATCH 1/8] Format `build.toml` consistently in platform support docs --- src/doc/rustc/src/platform-support/arm64e-apple-darwin.md | 2 +- src/doc/rustc/src/platform-support/arm64e-apple-ios.md | 2 +- src/doc/rustc/src/platform-support/arm64e-apple-tvos.md | 2 +- src/doc/rustc/src/platform-support/arm64ec-pc-windows-msvc.md | 2 +- .../rustc/src/platform-support/hexagon-unknown-linux-musl.md | 2 +- src/doc/rustc/src/platform-support/unikraft-linux-musl.md | 2 +- src/doc/rustc/src/platform-support/win7-windows-msvc.md | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/doc/rustc/src/platform-support/arm64e-apple-darwin.md b/src/doc/rustc/src/platform-support/arm64e-apple-darwin.md index 4d98b3a60986b..322a07c573970 100644 --- a/src/doc/rustc/src/platform-support/arm64e-apple-darwin.md +++ b/src/doc/rustc/src/platform-support/arm64e-apple-darwin.md @@ -20,7 +20,7 @@ You can build Rust with support for the targets by adding it to the `target` lis ```toml [build] -target = [ "arm64e-apple-darwin" ] +target = ["arm64e-apple-darwin"] ``` ## Building Rust programs diff --git a/src/doc/rustc/src/platform-support/arm64e-apple-ios.md b/src/doc/rustc/src/platform-support/arm64e-apple-ios.md index fc4ec5e373fb0..3d8ba5c282a60 100644 --- a/src/doc/rustc/src/platform-support/arm64e-apple-ios.md +++ b/src/doc/rustc/src/platform-support/arm64e-apple-ios.md @@ -18,7 +18,7 @@ You can build Rust with support for the targets by adding it to the `target` lis ```toml [build] -target = [ "arm64e-apple-ios" ] +target = ["arm64e-apple-ios"] ``` ## Building Rust programs diff --git a/src/doc/rustc/src/platform-support/arm64e-apple-tvos.md b/src/doc/rustc/src/platform-support/arm64e-apple-tvos.md index 47234809e5f19..ec8a996549e11 100644 --- a/src/doc/rustc/src/platform-support/arm64e-apple-tvos.md +++ b/src/doc/rustc/src/platform-support/arm64e-apple-tvos.md @@ -19,7 +19,7 @@ You can build Rust with support for the targets by adding it to the `target` lis ```toml [build] -target = [ "arm64e-apple-tvos" ] +target = ["arm64e-apple-tvos"] ``` ## Building Rust programs diff --git a/src/doc/rustc/src/platform-support/arm64ec-pc-windows-msvc.md b/src/doc/rustc/src/platform-support/arm64ec-pc-windows-msvc.md index dcabd21a83ebd..15bf55d35a26b 100644 --- a/src/doc/rustc/src/platform-support/arm64ec-pc-windows-msvc.md +++ b/src/doc/rustc/src/platform-support/arm64ec-pc-windows-msvc.md @@ -60,7 +60,7 @@ list in `config.toml`: ```toml [build] -target = [ "arm64ec-pc-windows-msvc" ] +target = ["arm64ec-pc-windows-msvc"] ``` ## Building Rust programs diff --git a/src/doc/rustc/src/platform-support/hexagon-unknown-linux-musl.md b/src/doc/rustc/src/platform-support/hexagon-unknown-linux-musl.md index c1372726a35f4..d858337a9499f 100644 --- a/src/doc/rustc/src/platform-support/hexagon-unknown-linux-musl.md +++ b/src/doc/rustc/src/platform-support/hexagon-unknown-linux-musl.md @@ -48,7 +48,7 @@ target list in `config.toml`, a sample configuration is shown below. ```toml [build] -target = [ "hexagon-unknown-linux-musl"] +target = ["hexagon-unknown-linux-musl"] [target.hexagon-unknown-linux-musl] diff --git a/src/doc/rustc/src/platform-support/unikraft-linux-musl.md b/src/doc/rustc/src/platform-support/unikraft-linux-musl.md index 90fa18b985783..c589208c099b9 100644 --- a/src/doc/rustc/src/platform-support/unikraft-linux-musl.md +++ b/src/doc/rustc/src/platform-support/unikraft-linux-musl.md @@ -39,7 +39,7 @@ You can build Rust with support for the targets by adding it to the `target` lis ```toml [build] build-stage = 1 -target = [ "x86_64-unikraft-linux-musl" ] +target = ["x86_64-unikraft-linux-musl"] ``` ## Building Rust programs diff --git a/src/doc/rustc/src/platform-support/win7-windows-msvc.md b/src/doc/rustc/src/platform-support/win7-windows-msvc.md index 96613fb9be4cc..45b00a2be8294 100644 --- a/src/doc/rustc/src/platform-support/win7-windows-msvc.md +++ b/src/doc/rustc/src/platform-support/win7-windows-msvc.md @@ -25,7 +25,7 @@ You can build Rust with support for the targets by adding it to the target list ```toml [build] build-stage = 1 -target = [ "x86_64-win7-windows-msvc" ] +target = ["x86_64-win7-windows-msvc"] ``` ## Building Rust programs From ea75d051c633fe04820e4122255d7bb7b6c47598 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Sat, 21 Dec 2024 11:08:42 +0100 Subject: [PATCH 2/8] Fix compiler team name in target tier docs --- src/doc/rustc/src/target-tier-policy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc/src/target-tier-policy.md b/src/doc/rustc/src/target-tier-policy.md index e9cf2a0d1ae5b..bdcf2c0b07efc 100644 --- a/src/doc/rustc/src/target-tier-policy.md +++ b/src/doc/rustc/src/target-tier-policy.md @@ -119,7 +119,7 @@ To propose addition of a new target, open a pull request on [`rust-lang/rust`]: Link to the created description page. - Ensure the pull request is assigned to a member of the [Rust compiler team][rust_compiler_team] by commenting: ```text - r? compiler-team + r? compiler ``` [tier3example]: https://github.com/rust-lang/rust/pull/94872 From 7d5ff8b8bd5b1bf4721fd9a3e34b2e90e670be09 Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Mon, 30 Dec 2024 11:25:00 +0000 Subject: [PATCH 3/8] Windows: Enable issue 70093 link tests --- src/tools/tidy/src/issues.txt | 2 -- .../issue-70093/issue-70093.rs | 10 ---------- ...-link-directives.rs => link-directives.rs} | 1 - .../issue-70093/link-native-libraries.rs | 20 +++++++++++++++++++ 4 files changed, 20 insertions(+), 13 deletions(-) delete mode 100644 tests/ui/link-native-libs/issue-70093/issue-70093.rs rename tests/ui/link-native-libs/issue-70093/{issue-70093-link-directives.rs => link-directives.rs} (82%) create mode 100644 tests/ui/link-native-libs/issue-70093/link-native-libraries.rs diff --git a/src/tools/tidy/src/issues.txt b/src/tools/tidy/src/issues.txt index 25cd32063aab8..54de2ef83148f 100644 --- a/src/tools/tidy/src/issues.txt +++ b/src/tools/tidy/src/issues.txt @@ -2710,8 +2710,6 @@ ui/limits/issue-75158-64.rs ui/link-native-libs/issue-109144.rs ui/link-native-libs/issue-43925.rs ui/link-native-libs/issue-43926.rs -ui/link-native-libs/issue-70093/issue-70093-link-directives.rs -ui/link-native-libs/issue-70093/issue-70093.rs ui/linkage-attr/auxiliary/issue-12133-dylib.rs ui/linkage-attr/auxiliary/issue-12133-dylib2.rs ui/linkage-attr/auxiliary/issue-12133-rlib.rs diff --git a/tests/ui/link-native-libs/issue-70093/issue-70093.rs b/tests/ui/link-native-libs/issue-70093/issue-70093.rs deleted file mode 100644 index 8697423933893..0000000000000 --- a/tests/ui/link-native-libs/issue-70093/issue-70093.rs +++ /dev/null @@ -1,10 +0,0 @@ -//@ run-pass -//@ compile-flags: -Zlink-native-libraries=no -Cdefault-linker-libraries=yes -//@ ignore-windows - this will probably only work on unixish systems -//@ ignore-fuchsia - missing __libc_start_main for some reason (#84733) -//@ ignore-cross-compile - default-linker-libraries=yes doesn't play well with cross compiling - -#[link(name = "some-random-non-existent-library", kind = "static")] -extern "C" {} - -fn main() {} diff --git a/tests/ui/link-native-libs/issue-70093/issue-70093-link-directives.rs b/tests/ui/link-native-libs/issue-70093/link-directives.rs similarity index 82% rename from tests/ui/link-native-libs/issue-70093/issue-70093-link-directives.rs rename to tests/ui/link-native-libs/issue-70093/link-directives.rs index 9c60affbccd59..c67536d39e7f1 100644 --- a/tests/ui/link-native-libs/issue-70093/issue-70093-link-directives.rs +++ b/tests/ui/link-native-libs/issue-70093/link-directives.rs @@ -1,6 +1,5 @@ //@ run-pass //@ compile-flags: -Zlink-directives=no -//@ ignore-windows - this will probably only work on unixish systems //@ ignore-fuchsia - missing __libc_start_main for some reason (#84733) //@ ignore-cross-compile - default-linker-libraries=yes doesn't play well with cross compiling diff --git a/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs b/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs new file mode 100644 index 0000000000000..3e14e33ba3940 --- /dev/null +++ b/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs @@ -0,0 +1,20 @@ +//@ run-pass +//@ compile-flags: -Zlink-native-libraries=no -Cdefault-linker-libraries=yes +//@ ignore-fuchsia - missing __libc_start_main for some reason (#84733) +//@ ignore-cross-compile - default-linker-libraries=yes doesn't play well with cross compiling + +//@ revisions: other +//@[other] ignore-msvc + +//@ revisions: msvc +// On Windows MSVC, default-linker-libraries=yes doesn't work because +// rustc drives the linker directly instead of going through another compiler. +// Therefore rustc would need to implement default-linker-libraries itself but doesn't. +// So instead we use -Clink-arg to directly set the required msvcrt.lib as a link arg. +//@[msvc] compile-flags: -Clink-arg=msvcrt.lib +//@[msvc] only-msvc + +#[link(name = "some-random-non-existent-library", kind = "static")] +extern "C" {} + +fn main() {} From dc1f2be449d5452f2f89855b7476b716bed7f671 Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Tue, 31 Dec 2024 02:25:35 +0000 Subject: [PATCH 4/8] Add comments to -Zlink-* tests --- tests/ui/link-native-libs/issue-70093/link-directives.rs | 5 +++++ .../link-native-libs/issue-70093/link-native-libraries.rs | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tests/ui/link-native-libs/issue-70093/link-directives.rs b/tests/ui/link-native-libs/issue-70093/link-directives.rs index c67536d39e7f1..1dc44c996fcd9 100644 --- a/tests/ui/link-native-libs/issue-70093/link-directives.rs +++ b/tests/ui/link-native-libs/issue-70093/link-directives.rs @@ -1,8 +1,13 @@ +// Ensure that `#[link]` attributes are entirely ignore when using `-Zlink-directives=no`. + //@ run-pass //@ compile-flags: -Zlink-directives=no //@ ignore-fuchsia - missing __libc_start_main for some reason (#84733) //@ ignore-cross-compile - default-linker-libraries=yes doesn't play well with cross compiling +// Usually these `#[link]` attribute would cause `libsome-random-non-existent-library` +// to be passed to the linker, causing it to fail because the file doesn't exist. +// However, with -Zlink-directives=no, the `#[link]` is ignored. #[link(name = "some-random-non-existent-library", kind = "static")] extern "C" {} diff --git a/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs b/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs index 3e14e33ba3940..b4dc9fb5cde17 100644 --- a/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs +++ b/tests/ui/link-native-libs/issue-70093/link-native-libraries.rs @@ -1,3 +1,6 @@ +// Ensure that rust does not pass native libraries to the linker when +// `-Zlink-native-libraries=no` is used. + //@ run-pass //@ compile-flags: -Zlink-native-libraries=no -Cdefault-linker-libraries=yes //@ ignore-fuchsia - missing __libc_start_main for some reason (#84733) @@ -14,6 +17,9 @@ //@[msvc] compile-flags: -Clink-arg=msvcrt.lib //@[msvc] only-msvc +// Usually these `#[link]` attribute would cause `libsome-random-non-existent-library` +// to be passed to the linker, causing it to fail because the file doesn't exist. +// However, -Zlink-native-libraries=no disables that. #[link(name = "some-random-non-existent-library", kind = "static")] extern "C" {} From d9ef419c90fff3797e8069979272c51c547119bb Mon Sep 17 00:00:00 2001 From: LemonJ <1632798336@qq.com> Date: Tue, 31 Dec 2024 10:59:13 +0800 Subject: [PATCH 5/8] fix doc for read write unaligned in zst operation --- library/core/src/ptr/mod.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index ac074c097d94c..2f6d42d946ce6 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -1403,8 +1403,6 @@ pub const unsafe fn read(src: *const T) -> T { /// whether `T` is [`Copy`]. If `T` is not [`Copy`], using both the returned /// value and the value at `*src` can [violate memory safety][read-ownership]. /// -/// Note that even if `T` has size `0`, the pointer must be non-null. -/// /// [read-ownership]: read#ownership-of-the-returned-value /// [valid]: self#safety /// @@ -1611,8 +1609,6 @@ pub const unsafe fn write(dst: *mut T, src: T) { /// /// * `dst` must be [valid] for writes. /// -/// Note that even if `T` has size `0`, the pointer must be non-null. -/// /// [valid]: self#safety /// /// ## On `packed` structs From 54e33bbdeca62508a71c0e445f1d1c82eb0b48c3 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 04:53:00 +0000 Subject: [PATCH 6/8] Account for C string literals in HiddenUnicodeCodepoints lint --- .../src/hidden_unicode_codepoints.rs | 24 +++++-- tests/ui/parser/unicode-control-codepoints.rs | 7 ++ .../parser/unicode-control-codepoints.stderr | 68 ++++++++++++++----- 3 files changed, 76 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs index 28368e1ab462b..4a7e4bf75cf3c 100644 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints { if !contains_text_flow_control_chars(text.as_str()) { return; } - let padding = match token_lit.kind { + let (padding, point_at_inner_spans) = match token_lit.kind { // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => 1, + ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), + // account for `c"` + ast::token::LitKind::CStr => (2, true), // account for `r###"` - ast::token::LitKind::StrRaw(n) => n as u32 + 2, - _ => return, + ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + ast::token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), }; - self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal"); + self.lint_text_direction_codepoint( + cx, + text, + expr.span, + padding, + point_at_inner_spans, + "literal", + ); } _ => {} }; diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index df099bb62ad1e..c2b9a9911ac5d 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -1,3 +1,5 @@ +//@ edition: 2021 + fn main() { // if access_level != "us‫e‪r" { // Check if admin //~^ ERROR unicode codepoint changing visible direction of text present in comment @@ -25,6 +27,11 @@ fn main() { //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal + + let _ = c"‮"; + //~^ ERROR unicode codepoint changing visible direction of text present in literal + let _ = cr#"‮"#; + //~^ ERROR unicode codepoint changing visible direction of text present in literal } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index 28de4ae72abbd..fa75df6a443ad 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,5 +1,5 @@ error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 + --> $DIR/unicode-control-codepoints.rs:8:26 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -7,7 +7,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 + --> $DIR/unicode-control-codepoints.rs:8:35 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -15,7 +15,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 + --> $DIR/unicode-control-codepoints.rs:18:26 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{202e}' @@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } �if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 + --> $DIR/unicode-control-codepoints.rs:18:30 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*� } \xE2\x81\xA6if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 + --> $DIR/unicode-control-codepoints.rs:18:41 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2069}' @@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*� } �if isAdmin\xE2\x81\xA9 � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 + --> $DIR/unicode-control-codepoints.rs:18:43 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*� } �if isAdmin� \xE2\x81\xA6 begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 + --> $DIR/unicode-control-codepoints.rs:23:29 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{202e}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 + --> $DIR/unicode-control-codepoints.rs:23:33 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 + --> $DIR/unicode-control-codepoints.rs:23:44 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2069}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 + --> $DIR/unicode-control-codepoints.rs:23:46 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:2:5 + --> $DIR/unicode-control-codepoints.rs:4:5 | LL | // if access_level != "us�e�r" { // Check if admin | ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:30:1 + --> $DIR/unicode-control-codepoints.rs:37:1 | LL | //"/*� } �if isAdmin� � begin admins only */" | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -112,7 +112,7 @@ LL | //"/*� } �if isAdmin� � begin admins only */" = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:11:22 + --> $DIR/unicode-control-codepoints.rs:13:22 | LL | println!("{:?}", "/*� } �if isAdmin� � begin admins only "); | ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:14:22 + --> $DIR/unicode-control-codepoints.rs:16:22 | LL | println!("{:?}", r##"/*� } �if isAdmin� � begin admins only "##); | ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:26:22 + --> $DIR/unicode-control-codepoints.rs:28:22 | LL | println!("{:?}", '�'); | ^-^ @@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca LL | println!("{:?}", '\u{202e}'); | ~~~~~~~~ +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:31:13 + | +LL | let _ = c"�"; + | ^^-^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = c"\u{202e}"; + | ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:33:13 + | +LL | let _ = cr#"�"#; + | ^^^^-^^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = cr#"\u{202e}"#; + | ~~~~~~~~ + error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:33:1 + --> $DIR/unicode-control-codepoints.rs:40:1 | LL | /** '�'); */fn foo() {} | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint @@ -177,7 +209,7 @@ LL | /** '�'); */fn foo() {} = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:36:1 + --> $DIR/unicode-control-codepoints.rs:43:1 | LL | / /** LL | | * @@ -188,5 +220,5 @@ LL | | * '�'); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' -error: aborting due to 17 previous errors +error: aborting due to 19 previous errors From c6afe82b8a3255145ba0eeeb49f8c590e38f38e2 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 04:15:40 +0000 Subject: [PATCH 7/8] Make parsed string literal fields named --- compiler/rustc_builtin_macros/src/asm.rs | 8 ++++++-- compiler/rustc_builtin_macros/src/format.rs | 6 +++--- compiler/rustc_builtin_macros/src/util.rs | 18 ++++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index 6ae697d4030db..238cc14ff0b0a 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -16,7 +16,7 @@ use smallvec::smallvec; use {rustc_ast as ast, rustc_parse_format as parse}; use crate::errors; -use crate::util::expr_to_spanned_string; +use crate::util::{ExprToSpannedString, expr_to_spanned_string}; pub struct AsmArgs { pub templates: Vec>, @@ -527,7 +527,11 @@ fn expand_preparsed_asm( let msg = "asm template must be a string literal"; let template_sp = template_expr.span; let template_is_mac_call = matches!(template_expr.kind, ast::ExprKind::MacCall(_)); - let (template_str, template_style, template_span) = { + let ExprToSpannedString { + symbol: template_str, + style: template_style, + span: template_span, + } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else { return ExpandResult::Retry(()); }; diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 528eb7725f5c8..5b3f08948a92d 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -17,7 +17,7 @@ use rustc_parse_format as parse; use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol}; use crate::errors; -use crate::util::expr_to_spanned_string; +use crate::util::{ExprToSpannedString, expr_to_spanned_string}; // The format_args!() macro is expanded in three steps: // 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax, @@ -166,13 +166,13 @@ fn make_format_args( let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input; - let (fmt_str, fmt_style, fmt_span) = { + let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else { return ExpandResult::Retry(()); }; match mac { Ok(mut fmt) if append_newline => { - fmt.0 = Symbol::intern(&format!("{}\n", fmt.0)); + fmt.symbol = Symbol::intern(&format!("{}\n", fmt.symbol)); fmt } Ok(fmt) => fmt, diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs index be12d21a80000..9162e94eddb85 100644 --- a/compiler/rustc_builtin_macros/src/util.rs +++ b/compiler/rustc_builtin_macros/src/util.rs @@ -57,7 +57,13 @@ pub(crate) fn warn_on_duplicate_attribute(ecx: &ExtCtxt<'_>, item: &Annotatable, /// `Ok` represents successfully retrieving the string literal at the correct /// position, e.g., `println("abc")`. -type ExprToSpannedStringResult<'a> = Result<(Symbol, ast::StrStyle, Span), UnexpectedExprKind<'a>>; +pub(crate) type ExprToSpannedStringResult<'a> = Result>; + +pub(crate) struct ExprToSpannedString { + pub symbol: Symbol, + pub style: ast::StrStyle, + pub span: Span, +} /// - `Ok` is returned when the conversion to a string literal is unsuccessful, /// but another type of expression is obtained instead. @@ -90,7 +96,11 @@ pub(crate) fn expr_to_spanned_string<'a>( ExpandResult::Ready(Err(match expr.kind { ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { Ok(ast::LitKind::Str(s, style)) => { - return ExpandResult::Ready(Ok((s, style, expr.span))); + return ExpandResult::Ready(Ok(ExprToSpannedString { + symbol: s, + style, + span: expr.span, + })); } Ok(ast::LitKind::ByteStr(..)) => { let mut err = cx.dcx().struct_span_err(expr.span, err_msg); @@ -128,7 +138,7 @@ pub(crate) fn expr_to_string( Ok((err, _)) => err.emit(), Err(guar) => guar, }) - .map(|(symbol, style, _)| (symbol, style)) + .map(|ExprToSpannedString { symbol, style, .. }| (symbol, style)) }) } @@ -183,7 +193,7 @@ pub(crate) fn get_single_str_spanned_from_tts( Ok((err, _)) => err.emit(), Err(guar) => guar, }) - .map(|(symbol, _style, span)| (symbol, span)) + .map(|ExprToSpannedString { symbol, span, .. }| (symbol, span)) }) } From ea291e5b5f5c2562fec89a11444e0dc4388565cf Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 05:03:22 +0000 Subject: [PATCH 8/8] Account for format_args in HiddenUnicodeCodepoints lint --- compiler/rustc_ast/src/format.rs | 5 ++ compiler/rustc_ast/src/mut_visit.rs | 2 +- compiler/rustc_ast/src/visit.rs | 2 +- compiler/rustc_builtin_macros/src/asm.rs | 1 + compiler/rustc_builtin_macros/src/format.rs | 14 ++++- compiler/rustc_builtin_macros/src/util.rs | 5 ++ .../src/hidden_unicode_codepoints.rs | 60 +++++++++++-------- tests/ui/parser/unicode-control-codepoints.rs | 3 + .../parser/unicode-control-codepoints.stderr | 24 ++++++-- 9 files changed, 82 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_ast/src/format.rs b/compiler/rustc_ast/src/format.rs index de628f098532f..b93846c1fe6f3 100644 --- a/compiler/rustc_ast/src/format.rs +++ b/compiler/rustc_ast/src/format.rs @@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol}; use crate::Expr; use crate::ptr::P; +use crate::token::LitKind; // Definitions: // @@ -45,6 +46,10 @@ pub struct FormatArgs { pub span: Span, pub template: Vec, pub arguments: FormatArguments, + /// The raw, un-split format string literal, with no escaping or processing. + /// + /// Generally only useful for lints that care about the raw bytes the user wrote. + pub uncooked_fmt_str: (LitKind, Symbol), } /// A piece of a format template string. diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 995924c2a2949..04cdfc93dcb15 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym( fn walk_format_args(vis: &mut T, fmt: &mut FormatArgs) { // FIXME: visit the template exhaustively. - let FormatArgs { span, template: _, arguments } = fmt; + let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt; for FormatArgument { kind, expr } in arguments.all_args_mut() { match kind { FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => { diff --git a/compiler/rustc_ast/src/visit.rs b/compiler/rustc_ast/src/visit.rs index c7cc772dabb7e..e99fc7b604e31 100644 --- a/compiler/rustc_ast/src/visit.rs +++ b/compiler/rustc_ast/src/visit.rs @@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>( } pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result { - let FormatArgs { span: _, template: _, arguments } = fmt; + let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt; for FormatArgument { kind, expr } in arguments.all_args() { match kind { FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => { diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index 238cc14ff0b0a..5062cf55bb9ad 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -531,6 +531,7 @@ fn expand_preparsed_asm( symbol: template_str, style: template_style, span: template_span, + .. } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else { return ExpandResult::Retry(()); diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 5b3f08948a92d..0112499c50949 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -166,7 +166,12 @@ fn make_format_args( let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input; - let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = { + let ExprToSpannedString { + symbol: fmt_str, + span: fmt_span, + style: fmt_style, + uncooked_symbol: uncooked_fmt_str, + } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else { return ExpandResult::Retry(()); }; @@ -584,7 +589,12 @@ fn make_format_args( } } - ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args })) + ExpandResult::Ready(Ok(FormatArgs { + span: fmt_span, + template, + arguments: args, + uncooked_fmt_str, + })) } fn invalid_placeholder_type_error( diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs index 9162e94eddb85..38fec2bff14c8 100644 --- a/compiler/rustc_builtin_macros/src/util.rs +++ b/compiler/rustc_builtin_macros/src/util.rs @@ -63,6 +63,10 @@ pub(crate) struct ExprToSpannedString { pub symbol: Symbol, pub style: ast::StrStyle, pub span: Span, + /// The raw string literal, with no escaping or processing. + /// + /// Generally only useful for lints that care about the raw bytes the user wrote. + pub uncooked_symbol: (ast::token::LitKind, Symbol), } /// - `Ok` is returned when the conversion to a string literal is unsuccessful, @@ -100,6 +104,7 @@ pub(crate) fn expr_to_spanned_string<'a>( symbol: s, style, span: expr.span, + uncooked_symbol: (token_lit.kind, token_lit.symbol), })); } Ok(ast::LitKind::ByteStr(..)) => { diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs index 4a7e4bf75cf3c..406aa1005dfba 100644 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints { sub, }); } + + fn check_literal( + &mut self, + cx: &EarlyContext<'_>, + text: Symbol, + lit_kind: ast::token::LitKind, + span: Span, + label: &'static str, + ) { + if !contains_text_flow_control_chars(text.as_str()) { + return; + } + let (padding, point_at_inner_spans) = match lit_kind { + // account for `"` or `'` + ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), + // account for `c"` + ast::token::LitKind::CStr => (2, true), + // account for `r###"` + ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + ast::token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), + }; + self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label); + } } + impl EarlyLintPass for HiddenUnicodeCodepoints { fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { if let ast::AttrKind::DocComment(_, comment) = attr.kind { @@ -97,32 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints { // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` match &expr.kind { ast::ExprKind::Lit(token_lit) => { - let text = token_lit.symbol; - if !contains_text_flow_control_chars(text.as_str()) { - return; - } - let (padding, point_at_inner_spans) = match token_lit.kind { - // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), - // account for `c"` - ast::token::LitKind::CStr => (2, true), - // account for `r###"` - ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), - // account for `cr###"` - ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), - // suppress bad literals. - ast::token::LitKind::Err(_) => return, - // Be conservative just in case new literals do support these. - _ => (0, false), - }; - self.lint_text_direction_codepoint( - cx, - text, - expr.span, - padding, - point_at_inner_spans, - "literal", - ); + self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal"); + } + ast::ExprKind::FormatArgs(args) => { + let (lit_kind, text) = args.uncooked_fmt_str; + self.check_literal(cx, text, lit_kind, args.span, "format string"); } _ => {} }; diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index c2b9a9911ac5d..14e1cfe59d39a 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -32,6 +32,9 @@ fn main() { //~^ ERROR unicode codepoint changing visible direction of text present in literal let _ = cr#"‮"#; //~^ ERROR unicode codepoint changing visible direction of text present in literal + + println!("{{‮}}"); + //~^ ERROR unicode codepoint changing visible direction of text present in format string } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fa75df6a443ad..2893194308ed9 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:37:1 + --> $DIR/unicode-control-codepoints.rs:40:1 | LL | //"/*� } �if isAdmin� � begin admins only */" | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -198,8 +198,24 @@ help: if you want to keep them but make them visible in your source code, you ca LL | let _ = cr#"\u{202e}"#; | ~~~~~~~~ +error: unicode codepoint changing visible direction of text present in format string + --> $DIR/unicode-control-codepoints.rs:36:14 + | +LL | println!("{{�}}"); + | ^^^-^^^ + | | | + | | '\u{202e}' + | this format string contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | println!("{{\u{202e}}}"); + | ~~~~~~~~ + error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:40:1 + --> $DIR/unicode-control-codepoints.rs:43:1 | LL | /** '�'); */fn foo() {} | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint @@ -209,7 +225,7 @@ LL | /** '�'); */fn foo() {} = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:43:1 + --> $DIR/unicode-control-codepoints.rs:46:1 | LL | / /** LL | | * @@ -220,5 +236,5 @@ LL | | * '�'); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' -error: aborting due to 19 previous errors +error: aborting due to 20 previous errors