Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

library: core::str::lines: Fix handling of trailing bare CR #91191

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
#![feature(intra_doc_pointers)]
#![feature(intrinsics)]
#![feature(lang_items)]
#![feature(let_else)]
#![feature(link_llvm_intrinsics)]
#![feature(llvm_asm)]
#![feature(min_specialization)]
Expand Down
4 changes: 2 additions & 2 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use super::from_utf8_unchecked;
use super::pattern::Pattern;
use super::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
use super::validations::{next_code_point, next_code_point_reverse, utf8_is_cont_byte};
use super::LinesAnyMap;
use super::LinesMap;
use super::{BytesIsNotEmpty, UnsafeBytesToStr};
use super::{CharEscapeDebugContinue, CharEscapeDefault, CharEscapeUnicode};
use super::{IsAsciiWhitespace, IsNotEmpty, IsWhitespace};
Expand Down Expand Up @@ -1095,7 +1095,7 @@ generate_pattern_iterators! {
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug)]
pub struct Lines<'a>(pub(super) Map<SplitTerminator<'a, char>, LinesAnyMap>);
pub struct Lines<'a>(pub(super) Map<SplitInclusive<'a, char>, LinesMap>);

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
Expand Down
67 changes: 62 additions & 5 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -899,10 +899,27 @@ impl str {
///
/// assert_eq!(None, lines.next());
/// ```
///
/// Handling of some edge cases:
///
/// ```
/// fn assert_splits_into(input: &str, expected: &[&str]) {
/// assert_eq!( input.lines().collect::<Vec<_>>(), expected );
/// }
///
// Note: there is another copy of this set of corner cases next to `std::io::BufRead::lines()`.
// The two functions should behave the same way; consider editing those other doctests too.
/// assert_splits_into("", &[]);
/// assert_splits_into("\n", &[""]);
/// assert_splits_into("\n2nd", &["", "2nd"]);
/// assert_splits_into("\r\n", &[""]);
/// assert_splits_into("bare\r", &["bare\r"]);
/// assert_splits_into("bare\rcr", &["bare\rcr"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn lines(&self) -> Lines<'_> {
Lines(self.split_terminator('\n').map(LinesAnyMap))
Lines(self.split_inclusive('\n').map(LinesMap))
}

/// An iterator over the lines of a string.
Expand Down Expand Up @@ -1833,6 +1850,48 @@ impl str {
self.trim_end_matches(|c: char| c.is_whitespace())
}

/// Returns a string slice with any one trailing newline removed.
///
/// 'Newline' is precisely a newline character (`0xA`), perhaps
/// preceded by a carriage return (`0xD`). I.e., `'\r\n'` or
/// `'\n'`. (This is the same definition as used by [`str::lines`]
/// and `std::io::BufRead::lines`.)
//
// Unfortunately it doesn't seem to be possible to make the reference to `lines`
// a link. This:
// [`std::io::BufRead::lines`]: ../std/io/trait.BufRead.html#method.lines
// works in `core`, but fails with a broken link error in `std`, where
// this text is incorporated due to `String`'s `Deref`.
///
/// # Examples
///
/// ```
/// #![feature(trim_newline)]
/// use std::fmt::Write as _;
///
/// assert_eq!("Text", "Text".trim_newline());
/// assert_eq!("Text", "Text\n".trim_newline());
/// assert_eq!("Text", "Text\r\n".trim_newline());
/// assert_eq!("Text\r", "Text\r".trim_newline());
/// assert_eq!("Text\n", "Text\n\n".trim_newline());
/// assert_eq!("Text\n\r", "Text\n\r".trim_newline()); // LF CR is not a valid newline
///
/// let mut s = String::new();
/// writeln!(s, " Hi! ").unwrap();
/// assert_eq!(" Hi! ", s.trim_newline());
/// assert_eq!(" Hi! ", s.trim_newline().trim_newline());
/// ```
#[inline]
#[must_use = "this returns the trimmed string as a new slice, \
without modifying the original"]
#[unstable(feature = "trim_newline", issue = "none")]
pub fn trim_newline(&self) -> &str {
let s = self;
let Some(s) = s.strip_suffix('\n') else { return s };
let Some(s) = s.strip_suffix('\r') else { return s };
s
}

/// Returns a string slice with leading whitespace removed.
///
/// 'Whitespace' is defined according to the terms of the Unicode Derived
Expand Down Expand Up @@ -2499,10 +2558,8 @@ impl Default for &mut str {
impl_fn_for_zst! {
/// A nameable, cloneable fn type
#[derive(Clone)]
struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
else { line }
struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
line.trim_newline()
};

#[derive(Clone)]
Expand Down
20 changes: 20 additions & 0 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2281,6 +2281,26 @@ pub trait BufRead: Read {
/// assert_eq!(lines_iter.next(), None);
/// ```
///
/// Handling of some edge cases:
///
/// ```
/// use std::io::{BufRead, Cursor};
///
/// fn assert_splits_into(input: &str, expected: &[&str]) {
/// let got = Cursor::new(input).lines().collect::<Result<Vec<_>,_>>().unwrap();
/// assert_eq!(got, expected);
/// }
///
// Note: there is another copy of this set of corner cases, next to `core::str::lines()`.
// The two functions should behave the same way; consider editing those other doctests too.
/// assert_splits_into("", &[]);
/// assert_splits_into("\n", &[""]);
/// assert_splits_into("\n2nd", &["", "2nd"]);
/// assert_splits_into("\r\n", &[""]);
/// assert_splits_into("bare\r", &["bare\r"]);
/// assert_splits_into("bare\rcr", &["bare\rcr"]);
/// ```
///
/// # Errors
///
/// Each line of the iterator has the same error semantics as [`BufRead::read_line`].
Expand Down