Skip to content

Commit

Permalink
Rollup merge of rust-lang#133184 - osiewicz:wasm-fix-infinite-loop-in…
Browse files Browse the repository at this point in the history
…-remove-dir-all, r=Noratrieb

wasi/fs: Improve stopping condition for <ReadDir as Iterator>::next

When upgrading [Zed](zed-industries/zed#19349) to Rust 1.82 I've encountered a test failure in our test suite. Specifically, one of our extension tests started hanging. I've tracked it down to a call to std::fs::remove_dir_all not returning when an extension is compiled with Rust 1.82 Our extension system uses WASM components, thus I've looked at the diff between 1.81 and 1.82 with respect to WASI and found 736f773

As it turned out, calling remove_dir_all from extension returned io::ErrorKind::NotFound in 1.81; the underlying issue is that the ReadDir iterator never actually terminates iteration, however since it loops around, with 1.81 we'd come across an entry second time and fail to remove it, since it would've been removed previously. With 1.82 and 736f773 it is no longer the case, thus we're seeing the hang. The tests do pass when everything but the extensions is compiled with 1.82.

This commit makes ReadDir::next adhere to readdir contract, namely it will no longer call readdir once the returned # of bytes is smaller than the size of a passed-in buffer. Previously we'd only terminate the loop if readdir returned 0.
  • Loading branch information
jieyouxu authored Dec 9, 2024
2 parents f6cb952 + f4ab982 commit 7219398
Showing 1 changed file with 105 additions and 70 deletions.
175 changes: 105 additions & 70 deletions library/std/src/sys/pal/wasi/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,27 @@ pub struct FileAttr {

pub struct ReadDir {
inner: Arc<ReadDirInner>,
cookie: Option<wasi::Dircookie>,
buf: Vec<u8>,
offset: usize,
cap: usize,
state: ReadDirState,
}

enum ReadDirState {
/// Fill `buf` with `buf.len()` bytes starting from `next_read_offset`.
FillBuffer {
next_read_offset: wasi::Dircookie,
buf: Vec<u8>,
},
ProcessEntry {
buf: Vec<u8>,
next_read_offset: Option<wasi::Dircookie>,
offset: usize,
},
/// There is no more data to get in [`Self::FillBuffer`]; keep returning
/// entries via ProcessEntry until `buf` is exhausted.
RunUntilExhaustion {
buf: Vec<u8>,
offset: usize,
},
Done,
}

struct ReadDirInner {
Expand Down Expand Up @@ -147,11 +164,8 @@ impl FileType {
impl ReadDir {
fn new(dir: File, root: PathBuf) -> ReadDir {
ReadDir {
cookie: Some(0),
buf: vec![0; 128],
offset: 0,
cap: 0,
inner: Arc::new(ReadDirInner { dir, root }),
state: ReadDirState::FillBuffer { next_read_offset: 0, buf: vec![0; 128] },
}
}
}
Expand All @@ -162,78 +176,99 @@ impl fmt::Debug for ReadDir {
}
}

impl core::iter::FusedIterator for ReadDir {}

impl Iterator for ReadDir {
type Item = io::Result<DirEntry>;

fn next(&mut self) -> Option<io::Result<DirEntry>> {
loop {
// If we've reached the capacity of our buffer then we need to read
// some more from the OS, otherwise we pick up at our old offset.
let offset = if self.offset == self.cap {
let cookie = self.cookie.take()?;
match self.inner.dir.fd.readdir(&mut self.buf, cookie) {
Ok(bytes) => self.cap = bytes,
Err(e) => return Some(Err(e)),
}
self.offset = 0;
self.cookie = Some(cookie);

// If we didn't actually read anything, this is in theory the
// end of the directory.
if self.cap == 0 {
self.cookie = None;
return None;
}

0
} else {
self.offset
};
let data = &self.buf[offset..self.cap];

// If we're not able to read a directory entry then that means it
// must have been truncated at the end of the buffer, so reset our
// offset so we can go back and reread into the buffer, picking up
// where we last left off.
let dirent_size = mem::size_of::<wasi::Dirent>();
if data.len() < dirent_size {
assert!(self.cookie.is_some());
assert!(self.buf.len() >= dirent_size);
self.offset = self.cap;
continue;
}
let (dirent, data) = data.split_at(dirent_size);
let dirent = unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) };

// If the file name was truncated, then we need to reinvoke
// `readdir` so we truncate our buffer to start over and reread this
// descriptor. Note that if our offset is 0 that means the file name
// is massive and we need a bigger buffer.
if data.len() < dirent.d_namlen as usize {
if offset == 0 {
let amt_to_add = self.buf.capacity();
self.buf.extend(iter::repeat(0).take(amt_to_add));
match &mut self.state {
ReadDirState::FillBuffer { next_read_offset, ref mut buf } => {
let result = self.inner.dir.fd.readdir(buf, *next_read_offset);
match result {
Ok(read_bytes) => {
if read_bytes < buf.len() {
buf.truncate(read_bytes);
self.state =
ReadDirState::RunUntilExhaustion { buf: mem::take(buf), offset: 0 };
} else {
debug_assert_eq!(read_bytes, buf.len());
self.state = ReadDirState::ProcessEntry {
buf: mem::take(buf),
offset: 0,
next_read_offset: Some(*next_read_offset),
};
}
self.next()
}
Err(e) => {
self.state = ReadDirState::Done;
return Some(Err(e));
}
}
assert!(self.cookie.is_some());
self.offset = self.cap;
continue;
}
self.cookie = Some(dirent.d_next);
self.offset = offset + dirent_size + dirent.d_namlen as usize;
ReadDirState::ProcessEntry { ref mut buf, next_read_offset, offset } => {
let contents = &buf[*offset..];
const DIRENT_SIZE: usize = crate::mem::size_of::<wasi::Dirent>();
if contents.len() >= DIRENT_SIZE {
let (dirent, data) = contents.split_at(DIRENT_SIZE);
let dirent =
unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) };
// If the file name was truncated, then we need to reinvoke
// `readdir` so we truncate our buffer to start over and reread this
// descriptor.
if data.len() < dirent.d_namlen as usize {
if buf.len() < dirent.d_namlen as usize + DIRENT_SIZE {
buf.resize(dirent.d_namlen as usize + DIRENT_SIZE, 0);
}
if let Some(next_read_offset) = *next_read_offset {
self.state =
ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) };
} else {
self.state = ReadDirState::Done;
}

return self.next();
}
next_read_offset.as_mut().map(|cookie| {
*cookie = dirent.d_next;
});
*offset = *offset + DIRENT_SIZE + dirent.d_namlen as usize;

let name = &data[..(dirent.d_namlen as usize)];
let name = &data[..(dirent.d_namlen as usize)];

// These names are skipped on all other platforms, so let's skip
// them here too
if name == b"." || name == b".." {
return self.next();
}

// These names are skipped on all other platforms, so let's skip
// them here too
if name == b"." || name == b".." {
continue;
return Some(Ok(DirEntry {
meta: dirent,
name: name.to_vec(),
inner: self.inner.clone(),
}));
} else if let Some(next_read_offset) = *next_read_offset {
self.state = ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) };
} else {
self.state = ReadDirState::Done;
}
self.next()
}
ReadDirState::RunUntilExhaustion { buf, offset } => {
if *offset >= buf.len() {
self.state = ReadDirState::Done;
} else {
self.state = ReadDirState::ProcessEntry {
buf: mem::take(buf),
offset: *offset,
next_read_offset: None,
};
}

return Some(Ok(DirEntry {
meta: dirent,
name: name.to_vec(),
inner: self.inner.clone(),
}));
self.next()
}
ReadDirState::Done => None,
}
}
}
Expand Down

0 comments on commit 7219398

Please sign in to comment.