Skip to content

Commit

Permalink
Add async BufReader
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed May 3, 2019
1 parent aaa1efc commit 2b54e87
Show file tree
Hide file tree
Showing 4 changed files with 590 additions and 1 deletion.
258 changes: 258 additions & 0 deletions futures-util/src/io/buf_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
use futures_core::task::{Context, Poll};
use futures_io::{AsyncBufRead, AsyncRead, AsyncSeek, Initializer, IoSliceMut, SeekFrom};
use pin_utils::{unsafe_pinned, unsafe_unpinned};
use std::io::{self, Read};
use std::pin::Pin;
use std::{cmp, fmt};
use super::DEFAULT_BUF_SIZE;

/// The `BufReader` struct adds buffering to any reader.
///
/// It can be excessively inefficient to work directly with a [`AsyncRead`]
/// instance. A `BufReader` performs large, infrequent reads on the underlying
/// [`AsyncRead`] and maintains an in-memory buffer of the results.
///
/// `BufReader` can improve the speed of programs that make *small* and
/// *repeated* read calls to the same file or network socket. It does not
/// help when reading very large amounts at once, or reading just one or a few
/// times. It also provides no advantage when reading from a source that is
/// already in memory, like a `Vec<u8>`.
///
/// When the `BufReader` is dropped, the contents of its buffer will be
/// discarded. Creating multiple instances of a `BufReader` on the same
/// stream can cause data loss.
///
/// [`AsyncRead`]: futures_io::AsyncRead
///
// TODO: Examples
pub struct BufReader<R> {
inner: R,
buf: Box<[u8]>,
pos: usize,
cap: usize,
}

impl<R: AsyncRead> BufReader<R> {
unsafe_pinned!(inner: R);
unsafe_pinned!(buf: Box<[u8]>);
unsafe_unpinned!(pos: usize);
unsafe_unpinned!(cap: usize);

/// Creates a new `BufReader` with a default buffer capacity. The default is currently 8 KB,
/// but may change in the future.
pub fn new(inner: R) -> Self {
Self::with_capacity(DEFAULT_BUF_SIZE, inner)
}

/// Creates a new `BufReader` with the specified buffer capacity.
pub fn with_capacity(capacity: usize, inner: R) -> Self {
unsafe {
let mut buffer = Vec::with_capacity(capacity);
buffer.set_len(capacity);
inner.initializer().initialize(&mut buffer);
Self {
inner,
buf: buffer.into_boxed_slice(),
pos: 0,
cap: 0,
}
}
}

/// Gets a reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
pub fn get_ref(&self) -> &R {
&self.inner
}

/// Gets a mutable reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
pub fn get_mut(&mut self) -> &mut R {
&mut self.inner
}

/// Gets a pinned mutable reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
pub fn get_pin_mut<'a>(self: Pin<&'a mut Self>) -> Pin<&'a mut R> {
self.inner()
}

/// Consumes this `BufWriter`, returning the underlying reader.
///
/// Note that any leftover data in the internal buffer is lost.
pub fn into_inner(self) -> R {
self.inner
}

/// Returns a reference to the internally buffered data.
///
/// Unlike `fill_buf`, this will not attempt to fill the buffer if it is empty.
pub fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.cap]
}

/// Invalidates all data in the internal buffer.
#[inline]
fn discard_buffer(mut self: Pin<&mut Self>) {
*self.as_mut().pos() = 0;
*self.cap() = 0;
}
}

impl<R: AsyncRead + AsyncSeek> BufReader<R> {
// https://github.com/rust-lang/rust/issues/31100
/// Seeks relative to the current position. If the new position lies within the buffer,
/// the buffer will not be flushed, allowing for more efficient seeks.
/// This method does not return the location of the underlying reader, so the caller
/// must track this information themselves if it is required.
pub fn poll_seek_relative(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
offset: i64,
) -> Poll<io::Result<()>> {
let pos = self.pos as u64;
if offset < 0 {
if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
*self.as_mut().pos() = new_pos as usize;
return Poll::Ready(Ok(()));
}
} else if let Some(new_pos) = pos.checked_add(offset as u64) {
if new_pos <= self.cap as u64 {
*self.as_mut().pos() = new_pos as usize;
return Poll::Ready(Ok(()));
}
}
self.poll_seek(cx, SeekFrom::Current(offset)).map(|res| res.map(|_| ()))
}
}

impl<R: AsyncRead> AsyncRead for BufReader<R> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.pos == self.cap && buf.len() >= self.buf.len() {
let res = ready!(self.as_mut().inner().poll_read(cx, buf));
self.discard_buffer();
return Poll::Ready(res);
}
let mut rem = ready!(self.as_mut().poll_fill_buf(cx))?;
let nread = rem.read(buf)?;
self.consume(nread);
Poll::Ready(Ok(nread))
}

fn poll_read_vectored(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
bufs: &mut [IoSliceMut<'_>],
) -> Poll<io::Result<usize>> {
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
if self.pos == self.cap && total_len >= self.buf.len() {
let res = ready!(self.as_mut().inner().poll_read_vectored(cx, bufs));
self.discard_buffer();
return Poll::Ready(res);
}
let mut rem = ready!(self.as_mut().poll_fill_buf(cx))?;
let nread = rem.read_vectored(bufs)?;
self.consume(nread);
Poll::Ready(Ok(nread))
}

// we can't skip unconditionally because of the large buffer case in read.
unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}
}

impl<R: AsyncRead> AsyncBufRead for BufReader<R> {
fn poll_fill_buf<'a>(
self: Pin<&'a mut Self>,
cx: &mut Context<'_>,
) -> Poll<io::Result<&'a [u8]>> {
let Self { inner, buf, cap, pos } = unsafe { Pin::get_unchecked_mut(self) };
let mut inner = unsafe { Pin::new_unchecked(inner) };

// If we've reached the end of our internal buffer then we need to fetch
// some more data from the underlying reader.
// Branch using `>=` instead of the more correct `==`
// to tell the compiler that the pos..cap slice is always valid.
if *pos >= *cap {
debug_assert!(*pos == *cap);
*cap = ready!(inner.as_mut().poll_read(cx, buf))?;
*pos = 0;
}
Poll::Ready(Ok(&buf[*pos..*cap]))
}

fn consume(mut self: Pin<&mut Self>, amt: usize) {
*self.as_mut().pos() = cmp::min(self.pos + amt, self.cap);
}
}

impl<R: AsyncRead + fmt::Debug> fmt::Debug for BufReader<R> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("BufReader")
.field("reader", &self.inner)
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
.finish()
}
}

impl<R: AsyncRead + AsyncSeek> AsyncSeek for BufReader<R> {
/// Seek to an offset, in bytes, in the underlying reader.
///
/// The position used for seeking with `SeekFrom::Current(_)` is the
/// position the underlying reader would be at if the `BufReader` had no
/// internal buffer.
///
/// Seeking always discards the internal buffer, even if the seek position
/// would otherwise fall within it. This guarantees that calling
/// `.into_inner()` immediately after a seek yields the underlying reader
/// at the same position.
///
/// To seek without discarding the internal buffer, use
/// [`BufReader::poll_seek_relative`](BufReader::poll_seek_relative).
///
/// See [`AsyncSeek`](futures_io::AsyncSeek) for more details.
///
/// Note: In the edge case where you're seeking with `SeekFrom::Current(n)`
/// where `n` minus the internal buffer length overflows an `i64`, two
/// seeks will be performed instead of one. If the second seek returns
/// `Err`, the underlying reader will be left at the same position it would
/// have if you called `seek` with `SeekFrom::Current(0)`.
fn poll_seek(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
pos: SeekFrom,
) -> Poll<io::Result<u64>> {
let result: u64;
if let SeekFrom::Current(n) = pos {
let remainder = (self.cap - self.pos) as i64;
// it should be safe to assume that remainder fits within an i64 as the alternative
// means we managed to allocate 8 exbibytes and that's absurd.
// But it's not out of the realm of possibility for some weird underlying reader to
// support seeking by i64::min_value() so we need to handle underflow when subtracting
// remainder.
if let Some(offset) = n.checked_sub(remainder) {
result = ready!(self.as_mut().inner().poll_seek(cx, SeekFrom::Current(offset)))?;
} else {
// seek backwards by our remainder, and then by the offset
ready!(self.as_mut().inner().poll_seek(cx, SeekFrom::Current(-remainder)))?;
result = ready!(self.as_mut().inner().poll_seek(cx, SeekFrom::Current(n)))?;
}
} else {
// Seeking with Start/End doesn't care about our buffer length.
result = ready!(self.as_mut().inner().poll_seek(cx, pos))?;
}
self.discard_buffer();
Poll::Ready(Ok(result))
}
}
10 changes: 10 additions & 0 deletions futures-util/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,19 @@ pub use futures_io::{

#[cfg(feature = "io-compat")] use crate::compat::Compat;

// used by `BufReader` and `BufWriter`
// https://github.com/rust-lang/rust/blob/master/src/libstd/sys_common/io.rs#L1
const DEFAULT_BUF_SIZE: usize = 8 * 1024;

mod allow_std;
pub use self::allow_std::AllowStdIo;

mod buf_reader;
pub use self::buf_reader::BufReader;

// mod buf_writer;
// pub use self::buf_writer::BufWriter;

mod copy_into;
pub use self::copy_into::CopyInto;

Expand Down
2 changes: 1 addition & 1 deletion futures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ pub mod io {
};

pub use futures_util::io::{
AsyncReadExt, AsyncWriteExt, AsyncSeekExt, AsyncBufReadExt, AllowStdIo,
AsyncReadExt, AsyncWriteExt, AsyncSeekExt, AsyncBufReadExt, AllowStdIo, BufReader,
Close, CopyInto, Flush, Read, ReadExact, ReadHalf, ReadToEnd, ReadUntil,
Seek, Window, WriteAll, WriteHalf,
};
Expand Down
Loading

0 comments on commit 2b54e87

Please sign in to comment.