Rollup merge of #92908 - dtolnay:rustdoc, r=GuillaumeGomez

Render more readable macro matcher tokens in rustdoc Follow-up to #92334. This PR lifts some of the token rendering logic from https://github.com/dtolnay/prettyplease into rustdoc so that even the matchers for which a source code snippet is not available (because they are macro-generated, or any other reason) follow some baseline good assumptions about where the tokens in the macro matcher are appropriate to space. The below screenshots show an example of the difference using one of the gnarliest macros I could find. Some things to notice: - In the **before**, notice how a couple places break in between `$(....)`↵`*`, which is just about the worst possible place that it could break. - In the **before**, the lines that wrapped are weirdly indented by 1 space of indentation relative to column 0. In the **after**, we use the typical way of block indenting in Rust syntax which is put the open/close delimiters on their own line and indent their contents by 4 spaces relative to the previous line (so 8 spaces relative to column 0, because the matcher itself is indented by 4 relative to the `macro_rules` header). - In the **after**, macro_rules metavariables like `$tokens:tt` are kept together, which is how just about everybody writing Rust today writes them. ## Before ![Screenshot from 2022-01-14 13-05-53](https://user-images.githubusercontent.com/1940490/149585105-1f182b78-751f-421f-a234-9dbc04fa3bbd.png) ## After ![Screenshot from 2022-01-14 13-06-04](https://user-images.githubusercontent.com/1940490/149585118-d4b52ea7-3e67-4b6e-a12b-31dfb8172f86.png) r? `@camelid`
rust-lang · Jan 30, 2022 · ba01337 · ba01337
2 parents 0610d4f + 039a058
commit ba01337
Show file tree

Hide file tree

Showing 7 changed files with 296 additions and 61 deletions.
diff --git a/compiler/rustc_ast_pretty/src/pp.rs b/compiler/rustc_ast_pretty/src/pp.rs
@@ -457,7 +457,7 @@ impl Printer {
         self.break_offset(n, 0)
     }
 
-    crate fn zerobreak(&mut self) {
+    pub fn zerobreak(&mut self) {
         self.spaces(0)
     }
 

diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs
@@ -5,6 +5,7 @@ mod auto_trait;
 mod blanket_impl;
 crate mod cfg;
 crate mod inline;
+mod render_macro_matchers;
 mod simplify;
 crate mod types;
 crate mod utils;

diff --git a/src/librustdoc/clean/render_macro_matchers.rs b/src/librustdoc/clean/render_macro_matchers.rs
@@ -0,0 +1,240 @@
+use rustc_ast::token::{self, BinOpToken, DelimToken};
+use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast_pretty::pprust::state::State as Printer;
+use rustc_ast_pretty::pprust::PrintState;
+use rustc_middle::ty::TyCtxt;
+use rustc_session::parse::ParseSess;
+use rustc_span::source_map::FilePathMapping;
+use rustc_span::symbol::{kw, Ident, Symbol};
+use rustc_span::Span;
+
+/// Render a macro matcher in a format suitable for displaying to the user
+/// as part of an item declaration.
+pub(super) fn render_macro_matcher(tcx: TyCtxt<'_>, matcher: &TokenTree) -> String {
+    if let Some(snippet) = snippet_equal_to_token(tcx, matcher) {
+        // If the original source code is known, we display the matcher exactly
+        // as present in the source code.
+        return snippet;
+    }
+
+    // If the matcher is macro-generated or some other reason the source code
+    // snippet is not available, we attempt to nicely render the token tree.
+    let mut printer = Printer::new();
+
+    // If the inner ibox fits on one line, we get:
+    //
+    //     macro_rules! macroname {
+    //         (the matcher) => {...};
+    //     }
+    //
+    // If the inner ibox gets wrapped, the cbox will break and get indented:
+    //
+    //     macro_rules! macroname {
+    //         (
+    //             the matcher ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    //             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!
+    //         ) => {...};
+    //     }
+    printer.cbox(8);
+    printer.word("(");
+    printer.zerobreak();
+    printer.ibox(0);
+    match matcher {
+        TokenTree::Delimited(_span, _delim, tts) => print_tts(&mut printer, tts),
+        // Matcher which is not a Delimited is unexpected and should've failed
+        // to compile, but we render whatever it is wrapped in parens.
+        TokenTree::Token(_) => print_tt(&mut printer, matcher),
+    }
+    printer.end();
+    printer.break_offset_if_not_bol(0, -4);
+    printer.word(")");
+    printer.end();
+    printer.s.eof()
+}
+
+/// Find the source snippet for this token's Span, reparse it, and return the
+/// snippet if the reparsed TokenTree matches the argument TokenTree.
+fn snippet_equal_to_token(tcx: TyCtxt<'_>, matcher: &TokenTree) -> Option<String> {
+    // Find what rustc thinks is the source snippet.
+    // This may not actually be anything meaningful if this matcher was itself
+    // generated by a macro.
+    let source_map = tcx.sess.source_map();
+    let span = matcher.span();
+    let snippet = source_map.span_to_snippet(span).ok()?;
+
+    // Create a Parser.
+    let sess = ParseSess::new(FilePathMapping::empty());
+    let file_name = source_map.span_to_filename(span);
+    let mut parser =
+        match rustc_parse::maybe_new_parser_from_source_str(&sess, file_name, snippet.clone()) {
+            Ok(parser) => parser,
+            Err(diagnostics) => {
+                for mut diagnostic in diagnostics {
+                    diagnostic.cancel();
+                }
+                return None;
+            }
+        };
+
+    // Reparse a single token tree.
+    let mut reparsed_trees = match parser.parse_all_token_trees() {
+        Ok(reparsed_trees) => reparsed_trees,
+        Err(mut diagnostic) => {
+            diagnostic.cancel();
+            return None;
+        }
+    };
+    if reparsed_trees.len() != 1 {
+        return None;
+    }
+    let reparsed_tree = reparsed_trees.pop().unwrap();
+
+    // Compare against the original tree.
+    if reparsed_tree.eq_unspanned(matcher) { Some(snippet) } else { None }
+}
+
+fn print_tt(printer: &mut Printer<'_>, tt: &TokenTree) {
+    match tt {
+        TokenTree::Token(token) => {
+            let token_str = printer.token_to_string(token);
+            printer.word(token_str);
+            if let token::DocComment(..) = token.kind {
+                printer.hardbreak()
+            }
+        }
+        TokenTree::Delimited(_span, delim, tts) => {
+            let open_delim = printer.token_kind_to_string(&token::OpenDelim(*delim));
+            printer.word(open_delim);
+            if !tts.is_empty() {
+                if *delim == DelimToken::Brace {
+                    printer.space();
+                }
+                print_tts(printer, tts);
+                if *delim == DelimToken::Brace {
+                    printer.space();
+                }
+            }
+            let close_delim = printer.token_kind_to_string(&token::CloseDelim(*delim));
+            printer.word(close_delim);
+        }
+    }
+}
+
+fn print_tts(printer: &mut Printer<'_>, tts: &TokenStream) {
+    #[derive(Copy, Clone, PartialEq)]
+    enum State {
+        Start,
+        Dollar,
+        DollarIdent,
+        DollarIdentColon,
+        DollarParen,
+        DollarParenSep,
+        Pound,
+        PoundBang,
+        Ident,
+        Other,
+    }
+
+    use State::*;
+
+    let mut state = Start;
+    for tt in tts.trees() {
+        let (needs_space, next_state) = match &tt {
+            TokenTree::Token(tt) => match (state, &tt.kind) {
+                (Dollar, token::Ident(..)) => (false, DollarIdent),
+                (DollarIdent, token::Colon) => (false, DollarIdentColon),
+                (DollarIdentColon, token::Ident(..)) => (false, Other),
+                (
+                    DollarParen,
+                    token::BinOp(BinOpToken::Plus | BinOpToken::Star) | token::Question,
+                ) => (false, Other),
+                (DollarParen, _) => (false, DollarParenSep),
+                (DollarParenSep, token::BinOp(BinOpToken::Plus | BinOpToken::Star)) => {
+                    (false, Other)
+                }
+                (Pound, token::Not) => (false, PoundBang),
+                (_, token::Ident(symbol, /* is_raw */ false))
+                    if !usually_needs_space_between_keyword_and_open_delim(*symbol, tt.span) =>
+                {
+                    (true, Ident)
+                }
+                (_, token::Comma | token::Semi) => (false, Other),
+                (_, token::Dollar) => (true, Dollar),
+                (_, token::Pound) => (true, Pound),
+                (_, _) => (true, Other),
+            },
+            TokenTree::Delimited(_, delim, _) => match (state, delim) {
+                (Dollar, DelimToken::Paren) => (false, DollarParen),
+                (Pound | PoundBang, DelimToken::Bracket) => (false, Other),
+                (Ident, DelimToken::Paren | DelimToken::Bracket) => (false, Other),
+                (_, _) => (true, Other),
+            },
+        };
+        if state != Start && needs_space {
+            printer.space();
+        }
+        print_tt(printer, &tt);
+        state = next_state;
+    }
+}
+
+fn usually_needs_space_between_keyword_and_open_delim(symbol: Symbol, span: Span) -> bool {
+    let ident = Ident { name: symbol, span };
+    let is_keyword = ident.is_used_keyword() || ident.is_unused_keyword();
+    if !is_keyword {
+        // An identifier that is not a keyword usually does not need a space
+        // before an open delim. For example: `f(0)` or `f[0]`.
+        return false;
+    }
+
+    match symbol {
+        // No space after keywords that are syntactically an expression. For
+        // example: a tuple struct created with `let _ = Self(0, 0)`, or if
+        // someone has `impl Index<MyStruct> for bool` then `true[MyStruct]`.
+        kw::False | kw::SelfLower | kw::SelfUpper | kw::True => false,
+
+        // No space, as in `let _: fn();`
+        kw::Fn => false,
+
+        // No space, as in `pub(crate) type T;`
+        kw::Pub => false,
+
+        // No space for keywords that can end an expression, as in `fut.await()`
+        // where fut's Output type is `fn()`.
+        kw::Await => false,
+
+        // Otherwise space after keyword. Some examples:
+        //
+        // `expr as [T; 2]`
+        //         ^
+        // `box (tuple,)`
+        //     ^
+        // `break (tuple,)`
+        //       ^
+        // `type T = dyn (Fn() -> dyn Trait) + Send;`
+        //              ^
+        // `for (tuple,) in iter {}`
+        //     ^
+        // `if (tuple,) == v {}`
+        //    ^
+        // `impl [T] {}`
+        //      ^
+        // `for x in [..] {}`
+        //          ^
+        // `let () = unit;`
+        //     ^
+        // `match [x, y] {...}`
+        //       ^
+        // `&mut (x as T)`
+        //      ^
+        // `return [];`
+        //        ^
+        // `fn f<T>() where (): Into<T>`
+        //                 ^
+        // `while (a + b).what() {}`
+        //       ^
+        // `yield [];`
+        //       ^
+        _ => true,
+    }
+}
diff --git a/src/librustdoc/clean/utils.rs b/src/librustdoc/clean/utils.rs
@@ -1,5 +1,6 @@
 use crate::clean::auto_trait::AutoTraitFinder;
 use crate::clean::blanket_impl::BlanketImplFinder;
+use crate::clean::render_macro_matchers::render_macro_matcher;
 use crate::clean::{
     inline, Clean, Crate, ExternalCrate, Generic, GenericArg, GenericArgs, ImportSource, Item,
     ItemKind, Lifetime, Path, PathSegment, Primitive, PrimitiveType, Type, TypeBinding, Visibility,
@@ -17,8 +18,6 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE};
 use rustc_middle::mir::interpret::ConstValue;
 use rustc_middle::ty::subst::{GenericArgKind, SubstsRef};
 use rustc_middle::ty::{self, DefIdTree, TyCtxt};
-use rustc_session::parse::ParseSess;
-use rustc_span::source_map::FilePathMapping;
 use rustc_span::symbol::{kw, sym, Symbol};
 use std::fmt::Write as _;
 use std::mem;
@@ -500,57 +499,6 @@ pub(super) fn render_macro_arms<'a>(
     out
 }
 
-/// Render a macro matcher in a format suitable for displaying to the user
-/// as part of an item declaration.
-pub(super) fn render_macro_matcher(tcx: TyCtxt<'_>, matcher: &TokenTree) -> String {
-    if let Some(snippet) = snippet_equal_to_token(tcx, matcher) {
-        snippet
-    } else {
-        rustc_ast_pretty::pprust::tt_to_string(matcher)
-    }
-}
-
-/// Find the source snippet for this token's Span, reparse it, and return the
-/// snippet if the reparsed TokenTree matches the argument TokenTree.
-fn snippet_equal_to_token(tcx: TyCtxt<'_>, matcher: &TokenTree) -> Option<String> {
-    // Find what rustc thinks is the source snippet.
-    // This may not actually be anything meaningful if this matcher was itself
-    // generated by a macro.
-    let source_map = tcx.sess.source_map();
-    let span = matcher.span();
-    let snippet = source_map.span_to_snippet(span).ok()?;
-
-    // Create a Parser.
-    let sess = ParseSess::new(FilePathMapping::empty());
-    let file_name = source_map.span_to_filename(span);
-    let mut parser =
-        match rustc_parse::maybe_new_parser_from_source_str(&sess, file_name, snippet.clone()) {
-            Ok(parser) => parser,
-            Err(diagnostics) => {
-                for mut diagnostic in diagnostics {
-                    diagnostic.cancel();
-                }
-                return None;
-            }
-        };
-
-    // Reparse a single token tree.
-    let mut reparsed_trees = match parser.parse_all_token_trees() {
-        Ok(reparsed_trees) => reparsed_trees,
-        Err(mut diagnostic) => {
-            diagnostic.cancel();
-            return None;
-        }
-    };
-    if reparsed_trees.len() != 1 {
-        return None;
-    }
-    let reparsed_tree = reparsed_trees.pop().unwrap();
-
-    // Compare against the original tree.
-    if reparsed_tree.eq_unspanned(matcher) { Some(snippet) } else { None }
-}
-
 pub(super) fn display_macro_source(
     cx: &mut DocContext<'_>,
     name: Symbol,

diff --git a/src/test/rustdoc/macro-generated-macro.macro_linebreak_pre.html b/src/test/rustdoc/macro-generated-macro.macro_linebreak_pre.html
@@ -0,0 +1,6 @@
+macro_rules! linebreak {
+    (
+        <= 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
+        26 27 28 =>
+    ) => { ... };
+}
diff --git a/src/test/rustdoc/macro-generated-macro.macro_morestuff_pre.html b/src/test/rustdoc/macro-generated-macro.macro_morestuff_pre.html
@@ -0,0 +1,15 @@
+macro_rules! morestuff {
+    (
+        <= "space between most kinds of tokens" : 1 $x + @ :: >>= 'static
+        "no space inside paren or bracket" : (2 a) [2 a] $(2 $a:tt)*
+        "space inside curly brace" : { 2 a }
+        "no space inside empty delimiters" : () [] {}
+        "no space before comma or semicolon" : a, (a), { a }, a; [T; 0];
+        "the three repetition specifiers" : $(@)*, $(@)+, $(@)?
+        "repetition separators" : $(@)|*, $(@)|+, $(@)==*, $(@)static*
+        "plus or star cannot be a repetition separator" : $(@)+ * $(@)* +
+        "no space between ident and paren" : let _ = f(0) + f[0] + Struct {};
+        "space between keyword and paren" : return (a,) & for x in (..)
+        "some special case keywords" : pub(crate), fn() -> u8, Self(0, 0) =>
+    ) => { ... };
+}