From f74fe8bf4ca773e416d4da3a3bf37045b06ea3de Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Fri, 21 Apr 2023 20:53:21 +0100 Subject: [PATCH] Limit read size in `File::read_to_end` loop This works around performance issues on Windows by limiting reads the size of reads when the expected size is known. --- library/std/src/fs.rs | 40 ++++++++++++++++++++----------------- library/std/src/io/mod.rs | 24 +++++++++++++++++----- library/std/src/io/tests.rs | 2 +- library/std/src/lib.rs | 1 + 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/library/std/src/fs.rs b/library/std/src/fs.rs index c550378e7d6b7..55580b23a6249 100644 --- a/library/std/src/fs.rs +++ b/library/std/src/fs.rs @@ -249,9 +249,9 @@ pub struct DirBuilder { pub fn read>(path: P) -> io::Result> { fn inner(path: &Path) -> io::Result> { let mut file = File::open(path)?; - let size = file.metadata().map(|m| m.len()).unwrap_or(0); - let mut bytes = Vec::with_capacity(size as usize); - io::default_read_to_end(&mut file, &mut bytes)?; + let size = file.metadata().map(|m| m.len() as usize).ok(); + let mut bytes = Vec::with_capacity(size.unwrap_or(0)); + io::default_read_to_end(&mut file, &mut bytes, size)?; Ok(bytes) } inner(path.as_ref()) @@ -289,9 +289,9 @@ pub fn read>(path: P) -> io::Result> { pub fn read_to_string>(path: P) -> io::Result { fn inner(path: &Path) -> io::Result { let mut file = File::open(path)?; - let size = file.metadata().map(|m| m.len()).unwrap_or(0); - let mut string = String::with_capacity(size as usize); - io::default_read_to_string(&mut file, &mut string)?; + let size = file.metadata().map(|m| m.len() as usize).ok(); + let mut string = String::with_capacity(size.unwrap_or(0)); + io::default_read_to_string(&mut file, &mut string, size)?; Ok(string) } inner(path.as_ref()) @@ -732,12 +732,12 @@ impl fmt::Debug for File { } /// Indicates how much extra capacity is needed to read the rest of the file. -fn buffer_capacity_required(mut file: &File) -> usize { - let size = file.metadata().map(|m| m.len()).unwrap_or(0); - let pos = file.stream_position().unwrap_or(0); +fn buffer_capacity_required(mut file: &File) -> Option { + let size = file.metadata().map(|m| m.len()).ok()?; + let pos = file.stream_position().ok()?; // Don't worry about `usize` overflow because reading will fail regardless // in that case. - size.saturating_sub(pos) as usize + Some(size.saturating_sub(pos) as usize) } #[stable(feature = "rust1", since = "1.0.0")] @@ -761,14 +761,16 @@ impl Read for File { // Reserves space in the buffer based on the file size when available. fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - buf.reserve(buffer_capacity_required(self)); - io::default_read_to_end(self, buf) + let size = buffer_capacity_required(self); + buf.reserve(size.unwrap_or(0)); + io::default_read_to_end(self, buf, size) } // Reserves space in the buffer based on the file size when available. fn read_to_string(&mut self, buf: &mut String) -> io::Result { - buf.reserve(buffer_capacity_required(self)); - io::default_read_to_string(self, buf) + let size = buffer_capacity_required(self); + buf.reserve(size.unwrap_or(0)); + io::default_read_to_string(self, buf, size) } } #[stable(feature = "rust1", since = "1.0.0")] @@ -817,14 +819,16 @@ impl Read for &File { // Reserves space in the buffer based on the file size when available. fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { - buf.reserve(buffer_capacity_required(self)); - io::default_read_to_end(self, buf) + let size = buffer_capacity_required(self); + buf.reserve(size.unwrap_or(0)); + io::default_read_to_end(self, buf, size) } // Reserves space in the buffer based on the file size when available. fn read_to_string(&mut self, buf: &mut String) -> io::Result { - buf.reserve(buffer_capacity_required(self)); - io::default_read_to_string(self, buf) + let size = buffer_capacity_required(self); + buf.reserve(size.unwrap_or(0)); + io::default_read_to_string(self, buf, size) } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 020c723925aeb..b3b5803bf59c3 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -357,9 +357,17 @@ where // of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every // time is 4,500 times (!) slower than a default reservation size of 32 if the // reader has a very small amount of data to return. -pub(crate) fn default_read_to_end(r: &mut R, buf: &mut Vec) -> Result { +pub(crate) fn default_read_to_end( + r: &mut R, + buf: &mut Vec, + size_hint: Option, +) -> Result { let start_len = buf.len(); let start_cap = buf.capacity(); + // Optionally limit the maximum bytes read on each iteration. + // This adds an arbitrary fiddle factor to allow for more data than we expect. + let max_read_size = + size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)); let mut initialized = 0; // Extra initialized bytes from previous loop iteration loop { @@ -367,7 +375,12 @@ pub(crate) fn default_read_to_end(r: &mut R, buf: &mut Vec buf.reserve(32); // buf is full, need more space } - let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into(); + let mut spare = buf.spare_capacity_mut(); + if let Some(size) = max_read_size { + let len = cmp::min(spare.len(), size); + spare = &mut spare[..len] + } + let mut read_buf: BorrowedBuf<'_> = spare.into(); // SAFETY: These bytes were initialized but not filled in the previous loop unsafe { @@ -419,6 +432,7 @@ pub(crate) fn default_read_to_end(r: &mut R, buf: &mut Vec pub(crate) fn default_read_to_string( r: &mut R, buf: &mut String, + size_hint: Option, ) -> Result { // Note that we do *not* call `r.read_to_end()` here. We are passing // `&mut Vec` (the raw contents of `buf`) into the `read_to_end` @@ -429,7 +443,7 @@ pub(crate) fn default_read_to_string( // To prevent extraneously checking the UTF-8-ness of the entire buffer // we pass it to our hardcoded `default_read_to_end` implementation which // we know is guaranteed to only read data into the end of the buffer. - unsafe { append_to_string(buf, |b| default_read_to_end(r, b)) } + unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) } } pub(crate) fn default_read_vectored(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result @@ -709,7 +723,7 @@ pub trait Read { /// [`std::fs::read`]: crate::fs::read #[stable(feature = "rust1", since = "1.0.0")] fn read_to_end(&mut self, buf: &mut Vec) -> Result { - default_read_to_end(self, buf) + default_read_to_end(self, buf, None) } /// Read all bytes until EOF in this source, appending them to `buf`. @@ -752,7 +766,7 @@ pub trait Read { /// [`std::fs::read_to_string`]: crate::fs::read_to_string #[stable(feature = "rust1", since = "1.0.0")] fn read_to_string(&mut self, buf: &mut String) -> Result { - default_read_to_string(self, buf) + default_read_to_string(self, buf, None) } /// Read the exact number of bytes required to fill `buf`. diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index f4a886d889a99..6d30f5e6c6c8d 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -314,7 +314,7 @@ fn bench_read_to_end(b: &mut test::Bencher) { b.iter(|| { let mut lr = repeat(1).take(10000000); let mut vec = Vec::with_capacity(1024); - super::default_read_to_end(&mut lr, &mut vec) + super::default_read_to_end(&mut lr, &mut vec, None) }); } diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 98fcc76aa98f6..318a46d1b637e 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -289,6 +289,7 @@ #![feature(float_next_up_down)] #![feature(hasher_prefixfree_extras)] #![feature(hashmap_internals)] +#![feature(int_roundings)] #![feature(ip)] #![feature(ip_in_core)] #![feature(maybe_uninit_slice)]