Skip to content

Commit

Permalink
Rollup merge of #134956 - compiler-errors:format-args-hidden-chars, r…
Browse files Browse the repository at this point in the history
…=jieyouxu

Account for C string literals and `format_args` in `HiddenUnicodeCodepoints` lint

This is stacked on #134955, and either that can land first or both of them can land together here. I split this out because this is a bit more involved of an impl.

Fixes #94945
  • Loading branch information
matthiaskrgr authored Dec 31, 2024
2 parents 852440b + ea291e5 commit 0c94f63
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 42 deletions.
5 changes: 5 additions & 0 deletions compiler/rustc_ast/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol};

use crate::Expr;
use crate::ptr::P;
use crate::token::LitKind;

// Definitions:
//
Expand Down Expand Up @@ -45,6 +46,10 @@ pub struct FormatArgs {
pub span: Span,
pub template: Vec<FormatArgsPiece>,
pub arguments: FormatArguments,
/// The raw, un-split format string literal, with no escaping or processing.
///
/// Generally only useful for lints that care about the raw bytes the user wrote.
pub uncooked_fmt_str: (LitKind, Symbol),
}

/// A piece of a format template string.
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym<T: MutVisitor>(

fn walk_format_args<T: MutVisitor>(vis: &mut T, fmt: &mut FormatArgs) {
// FIXME: visit the template exhaustively.
let FormatArgs { span, template: _, arguments } = fmt;
let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt;
for FormatArgument { kind, expr } in arguments.all_args_mut() {
match kind {
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
}

pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result {
let FormatArgs { span: _, template: _, arguments } = fmt;
let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt;
for FormatArgument { kind, expr } in arguments.all_args() {
match kind {
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_builtin_macros/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use smallvec::smallvec;
use {rustc_ast as ast, rustc_parse_format as parse};

use crate::errors;
use crate::util::expr_to_spanned_string;
use crate::util::{ExprToSpannedString, expr_to_spanned_string};

pub struct AsmArgs {
pub templates: Vec<P<ast::Expr>>,
Expand Down Expand Up @@ -527,7 +527,12 @@ fn expand_preparsed_asm(
let msg = "asm template must be a string literal";
let template_sp = template_expr.span;
let template_is_mac_call = matches!(template_expr.kind, ast::ExprKind::MacCall(_));
let (template_str, template_style, template_span) = {
let ExprToSpannedString {
symbol: template_str,
style: template_style,
span: template_span,
..
} = {
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else {
return ExpandResult::Retry(());
};
Expand Down
18 changes: 14 additions & 4 deletions compiler/rustc_builtin_macros/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use rustc_parse_format as parse;
use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol};

use crate::errors;
use crate::util::expr_to_spanned_string;
use crate::util::{ExprToSpannedString, expr_to_spanned_string};

// The format_args!() macro is expanded in three steps:
// 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax,
Expand Down Expand Up @@ -166,13 +166,18 @@ fn make_format_args(

let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;

let (fmt_str, fmt_style, fmt_span) = {
let ExprToSpannedString {
symbol: fmt_str,
span: fmt_span,
style: fmt_style,
uncooked_symbol: uncooked_fmt_str,
} = {
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else {
return ExpandResult::Retry(());
};
match mac {
Ok(mut fmt) if append_newline => {
fmt.0 = Symbol::intern(&format!("{}\n", fmt.0));
fmt.symbol = Symbol::intern(&format!("{}\n", fmt.symbol));
fmt
}
Ok(fmt) => fmt,
Expand Down Expand Up @@ -584,7 +589,12 @@ fn make_format_args(
}
}

ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args }))
ExpandResult::Ready(Ok(FormatArgs {
span: fmt_span,
template,
arguments: args,
uncooked_fmt_str,
}))
}

fn invalid_placeholder_type_error(
Expand Down
23 changes: 19 additions & 4 deletions compiler/rustc_builtin_macros/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,17 @@ pub(crate) fn warn_on_duplicate_attribute(ecx: &ExtCtxt<'_>, item: &Annotatable,

/// `Ok` represents successfully retrieving the string literal at the correct
/// position, e.g., `println("abc")`.
type ExprToSpannedStringResult<'a> = Result<(Symbol, ast::StrStyle, Span), UnexpectedExprKind<'a>>;
pub(crate) type ExprToSpannedStringResult<'a> = Result<ExprToSpannedString, UnexpectedExprKind<'a>>;

pub(crate) struct ExprToSpannedString {
pub symbol: Symbol,
pub style: ast::StrStyle,
pub span: Span,
/// The raw string literal, with no escaping or processing.
///
/// Generally only useful for lints that care about the raw bytes the user wrote.
pub uncooked_symbol: (ast::token::LitKind, Symbol),
}

/// - `Ok` is returned when the conversion to a string literal is unsuccessful,
/// but another type of expression is obtained instead.
Expand Down Expand Up @@ -90,7 +100,12 @@ pub(crate) fn expr_to_spanned_string<'a>(
ExpandResult::Ready(Err(match expr.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, style)) => {
return ExpandResult::Ready(Ok((s, style, expr.span)));
return ExpandResult::Ready(Ok(ExprToSpannedString {
symbol: s,
style,
span: expr.span,
uncooked_symbol: (token_lit.kind, token_lit.symbol),
}));
}
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.dcx().struct_span_err(expr.span, err_msg);
Expand Down Expand Up @@ -128,7 +143,7 @@ pub(crate) fn expr_to_string(
Ok((err, _)) => err.emit(),
Err(guar) => guar,
})
.map(|(symbol, style, _)| (symbol, style))
.map(|ExprToSpannedString { symbol, style, .. }| (symbol, style))
})
}

Expand Down Expand Up @@ -183,7 +198,7 @@ pub(crate) fn get_single_str_spanned_from_tts(
Ok((err, _)) => err.emit(),
Err(guar) => guar,
})
.map(|(symbol, _style, span)| (symbol, span))
.map(|ExprToSpannedString { symbol, span, .. }| (symbol, span))
})
}

Expand Down
46 changes: 34 additions & 12 deletions compiler/rustc_lint/src/hidden_unicode_codepoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints {
sub,
});
}

fn check_literal(
&mut self,
cx: &EarlyContext<'_>,
text: Symbol,
lit_kind: ast::token::LitKind,
span: Span,
label: &'static str,
) {
if !contains_text_flow_control_chars(text.as_str()) {
return;
}
let (padding, point_at_inner_spans) = match lit_kind {
// account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
// account for `c"`
ast::token::LitKind::CStr => (2, true),
// account for `r###"`
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
// account for `cr###"`
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
// suppress bad literals.
ast::token::LitKind::Err(_) => return,
// Be conservative just in case new literals do support these.
_ => (0, false),
};
self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
}
}

impl EarlyLintPass for HiddenUnicodeCodepoints {
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
Expand All @@ -97,18 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
match &expr.kind {
ast::ExprKind::Lit(token_lit) => {
let text = token_lit.symbol;
if !contains_text_flow_control_chars(text.as_str()) {
return;
}
let padding = match token_lit.kind {
// account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
// account for `r###"`
ast::token::LitKind::StrRaw(n) => n as u32 + 2,
_ => return,
};
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
}
ast::ExprKind::FormatArgs(args) => {
let (lit_kind, text) = args.uncooked_fmt_str;
self.check_literal(cx, text, lit_kind, args.span, "format string");
}
_ => {}
};
Expand Down
10 changes: 10 additions & 0 deletions tests/ui/parser/unicode-control-codepoints.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//@ edition: 2021

fn main() {
// if access_level != "us‫e‪r" { // Check if admin
//~^ ERROR unicode codepoint changing visible direction of text present in comment
Expand Down Expand Up @@ -25,6 +27,14 @@ fn main() {
//~| ERROR non-ASCII character in raw byte string literal
println!("{:?}", '‮');
//~^ ERROR unicode codepoint changing visible direction of text present in literal

let _ = c"‮";
//~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = cr#"‮"#;
//~^ ERROR unicode codepoint changing visible direction of text present in literal

println!("{{‮}}");
//~^ ERROR unicode codepoint changing visible direction of text present in format string
}

//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"
Expand Down
Loading

0 comments on commit 0c94f63

Please sign in to comment.