Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rollup of 10 pull requests #104152

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6d45529
Fix #103451, find_width_of_character_at_span return width with 1 when…
chenyukang Oct 25, 2022
8dbd817
Upgrade cc for working is_flag_supported on cross-compiles
palfrey Oct 29, 2022
a9d7cfc
Update cc in Cargo.lock
palfrey Oct 29, 2022
f32e678
Rename some variables.
nnethercote Nov 3, 2022
84ca2c3
Clarify range calculations.
nnethercote Nov 3, 2022
34b32b0
Use `Mode` less.
nnethercote Nov 3, 2022
7dbf2c0
Make non-ASCII errors more consistent.
nnethercote Nov 3, 2022
a21c045
Improve comments.
nnethercote Nov 3, 2022
d963686
Refactor `cook_lexer_literal`.
nnethercote Nov 3, 2022
a203482
Inline and remove `validate_int_literal`.
nnethercote Nov 3, 2022
f8e2cef
Move intra-doc link checks to a separate function.
ehuss Nov 4, 2022
57b2290
Remove reference from the intra-doc link checker.
ehuss Nov 4, 2022
a838952
Remove `unescape_byte_literal`.
nnethercote Nov 4, 2022
43d21b5
Rename some `result` variables as `res`, for consistency.
nnethercote Nov 4, 2022
13d4c61
Place config.toml in current working directory if config not found
ted-tanner Nov 5, 2022
4c3cad0
Add --print=split-debuginfo
kamirr Nov 7, 2022
0c9896b
Fix `const_fn_trait_ref_impl`, add test for it
onestacked Nov 7, 2022
d97fa25
Fix invalid background-image file name
GuillaumeGomez Nov 7, 2022
87c190c
Reworked const fn ref tests
onestacked Nov 7, 2022
66e8a29
Only set config.config to None when using default path
ted-tanner Nov 7, 2022
ae5cc9c
fix: lint against lint functions
Rejyr Nov 8, 2022
d6c97a3
Simplify `unescape_{char,byte}`.
nnethercote Nov 8, 2022
807a7bf
clarify licensing situation of mpsc and spsc queue
pietroalbini Nov 8, 2022
2541408
Rollup merge of #103521 - chenyukang:yukang/fix-103451-avoid-hang, r=…
GuillaumeGomez Nov 8, 2022
05cbd26
Rollup merge of #103744 - palfrey:unwind-upgrade-cc, r=Mark-Simulacrum
GuillaumeGomez Nov 8, 2022
f80e4a2
Rollup merge of #103919 - nnethercote:unescaping-cleanups, r=matklad
GuillaumeGomez Nov 8, 2022
6a6cc6b
Rollup merge of #103952 - ehuss:dont-intra-linkcheck-reference, r=Mar…
GuillaumeGomez Nov 8, 2022
790746e
Rollup merge of #104027 - ted-tanner:issue-103697-fix, r=jyn514
GuillaumeGomez Nov 8, 2022
2f37412
Rollup merge of #104104 - kamirr:master, r=lcnr
GuillaumeGomez Nov 8, 2022
6926c04
Rollup merge of #104113 - ink-feather-org:fix_const_fn_ref_impls, r=c…
GuillaumeGomez Nov 8, 2022
fab18f8
Rollup merge of #104114 - GuillaumeGomez:background-image-path, r=not…
GuillaumeGomez Nov 8, 2022
b088452
Rollup merge of #104132 - Rejyr:rustc_lint-function-lints, r=davidtwco
GuillaumeGomez Nov 8, 2022
c421dac
Rollup merge of #104139 - ferrocene:pa-channel-licensing, r=pnkfelix
GuillaumeGomez Nov 8, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,9 @@ version = "0.1.0"

[[package]]
name = "cc"
version = "1.0.73"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
checksum = "581f5dba903aac52ea3feb5ec4810848460ee833876f1f9b0fdeab1f19091574"
dependencies = [
"jobserver",
]
Expand Down
29 changes: 11 additions & 18 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};

use rustc_lexer::unescape::{unescape_byte, unescape_char};
use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;

use std::ascii;

pub enum LitError {
Expand Down Expand Up @@ -109,13 +106,11 @@ impl LitKind {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand All @@ -127,13 +122,11 @@ impl LitKind {
let bytes = if s.contains('\r') {
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_driver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,17 @@ fn print_crate_info(
// Any output here interferes with Cargo's parsing of other printed output
NativeStaticLibs => {}
LinkArgs => {}
SplitDebuginfo => {
use rustc_target::spec::SplitDebuginfo::{Off, Packed, Unpacked};

for split in &[Off, Packed, Unpacked] {
let stable = sess.target.options.supported_split_debuginfo.contains(split);
let unstable_ok = sess.unstable_options();
if stable || unstable_ok {
println!("{}", split);
}
}
}
}
}
Compilation::Stop
Expand Down
10 changes: 5 additions & 5 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ pub enum RawStrError {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Base {
/// Literal starts with "0b".
Binary,
Binary = 2,
/// Literal starts with "0o".
Octal,
/// Literal starts with "0x".
Hexadecimal,
Octal = 8,
/// Literal doesn't contain a prefix.
Decimal,
Decimal = 10,
/// Literal starts with "0x".
Hexadecimal = 16,
}

/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
Expand Down
130 changes: 51 additions & 79 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,8 @@ pub enum EscapeError {

/// Unicode escape code in byte literal.
UnicodeEscapeInByte,
/// Non-ascii character in byte literal.
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,
/// Non-ascii character in byte string literal.
NonAsciiCharInByteString,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
Expand All @@ -78,54 +76,33 @@ impl EscapeError {
/// Takes a contents of a literal (without quotes) and produces a
/// sequence of escaped characters or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
pub fn unescape_literal<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
match mode {
Mode::Char | Mode::Byte => {
let mut chars = literal_text.chars();
let result = unescape_char_or_byte(&mut chars, mode);
// The Chars iterator moved forward.
callback(0..(literal_text.len() - chars.as_str().len()), result);
let mut chars = src.chars();
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
callback(0..(src.len() - chars.as_str().len()), res);
}
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback),
// NOTE: Raw strings do not perform any explicit character escaping, here we
// only translate CRLF to LF and produce errors on bare CR.
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
Mode::RawStr | Mode::RawByteStr => {
unescape_raw_str_or_raw_byte_str(literal_text, mode, callback)
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
}
}
}

/// Takes a contents of a byte, byte string or raw byte string (without quotes)
/// and produces a sequence of bytes or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_byte_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<u8, EscapeError>),
{
debug_assert!(mode.is_bytes());
unescape_literal(literal_text, mode, &mut |range, result| {
callback(range, result.map(byte_from_char));
})
}

/// Takes a contents of a char literal (without quotes), and returns an
/// unescaped char or an error
pub fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
/// unescaped char or an error.
pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
unescape_char_or_byte(&mut src.chars(), false)
}

/// Takes a contents of a byte literal (without quotes), and returns an
/// unescaped byte or an error.
pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Byte)
.map(byte_from_char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
unescape_char_or_byte(&mut src.chars(), true).map(byte_from_char)
}

/// What kind of literal do we parse.
Expand All @@ -147,20 +124,17 @@ impl Mode {
}
}

pub fn is_bytes(self) -> bool {
pub fn is_byte(self) -> bool {
match self {
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
Mode::Char | Mode::Str | Mode::RawStr => false,
}
}
}

fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
// Previous character was '\\', unescape what follows.

let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;

let res = match second_char {
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
'"' => '"',
'n' => '\n',
'r' => '\r',
Expand All @@ -181,7 +155,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
let value = hi * 16 + lo;

// For a non-byte literal verify that it is within ASCII range.
if !mode.is_bytes() && !is_ascii(value) {
if !is_byte && !is_ascii(value) {
return Err(EscapeError::OutOfRangeHexEscape);
}
let value = value as u8;
Expand Down Expand Up @@ -217,7 +191,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {

// Incorrect syntax has higher priority for error reporting
// than unallowed value for a literal.
if mode.is_bytes() {
if is_byte {
return Err(EscapeError::UnicodeEscapeInByte);
}

Expand Down Expand Up @@ -249,23 +223,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
}

#[inline]
fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
if mode.is_bytes() && !first_char.is_ascii() {
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
if is_byte && !c.is_ascii() {
// Byte literal can't be a non-ascii character.
Err(EscapeError::NonAsciiCharInByte)
} else {
Ok(first_char)
Ok(c)
}
}

fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
debug_assert!(mode == Mode::Char || mode == Mode::Byte);
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match first_char {
'\\' => scan_escape(chars, mode),
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match c {
'\\' => scan_escape(chars, is_byte),
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
}?;
if chars.next().is_some() {
return Err(EscapeError::MoreThanOneChar);
Expand All @@ -275,20 +248,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca

/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of escaped characters or errors.
fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::Str || mode == Mode::ByteStr);
let initial_len = src.len();
let mut chars = src.chars();
while let Some(first_char) = chars.next() {
let start = initial_len - chars.as_str().len() - first_char.len_utf8();

let unescaped_char = match first_char {
// The `start` and `end` computation here is complicated because
// `skip_ascii_whitespace` makes us to skip over chars without counting
// them in the range computation.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\\' => {
let second_char = chars.clone().next();
match second_char {
match chars.clone().next() {
Some('\n') => {
// Rust language specification requires us to skip whitespaces
// if unescaped '\' character is followed by '\n'.
Expand All @@ -297,17 +270,17 @@ where
skip_ascii_whitespace(&mut chars, start, callback);
continue;
}
_ => scan_escape(&mut chars, mode),
_ => scan_escape(&mut chars, is_byte),
}
}
'\n' => Ok('\n'),
'\t' => Ok('\t'),
'"' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();
callback(start..end, unescaped_char);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}

fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
Expand Down Expand Up @@ -340,30 +313,29 @@ where
/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of characters or errors.
/// NOTE: Raw strings do not perform any explicit character escaping, here we
/// only translate CRLF to LF and produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
/// only produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr);
let initial_len = literal_text.len();

let mut chars = literal_text.chars();
while let Some(curr) = chars.next() {
let start = initial_len - chars.as_str().len() - curr.len_utf8();
let mut chars = src.chars();

let result = match curr {
// The `start` and `end` computation here matches the one in
// `unescape_str_or_byte_str` for consistency, even though this function
// doesn't have to worry about skipping any chars.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString),
c => Ok(c),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();

callback(start..end, result);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}
}

fn byte_from_char(c: char) -> u8 {
#[inline]
pub fn byte_from_char(c: char) -> u8 {
let res = c as u32;
debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
res as u8
Expand Down
Loading