From 713e1877961e35d65deb7a22573e38c56e353df3 Mon Sep 17 00:00:00 2001 From: Crypt Keeper <64215+codefromthecrypt@users.noreply.github.com> Date: Sun, 15 Jan 2023 17:30:45 -0800 Subject: [PATCH] fs: extracts syscallfs.ReaderAtOffset for WASI and gojs (#1037) This extracts a utility `syscallfs.ReaderAtOffset()` to allow WASI and gojs to re-use the same logic to implement `syscall.Pread`. What's different than before is that if WASI passes multiple iovecs an emulated `ReaderAt` will seek to the read position on each call to `Read` vs once per loop. This was a design decision to keep the call sites compatible between files that implement ReaderAt and those that emulate them with Seeker (e.g. avoid the need for a read-scoped closer/ defer function). The main use case for emulation is `embed.file`, whose seek function is cheap, so there's little performance impact to this. Signed-off-by: Adrian Cole --- RATIONALE.md | 10 +- imports/wasi_snapshot_preview1/fs.go | 47 ++----- imports/wasi_snapshot_preview1/fs_test.go | 43 +++--- internal/gojs/fs.go | 14 +- internal/gojs/testdata/fs/main.go | 25 ++++ internal/syscallfs/adapter_test.go | 32 ++++- internal/syscallfs/syscallfs.go | 83 ++++++++++- internal/syscallfs/syscallfs_bench_test.go | 63 +++++++++ internal/syscallfs/syscallfs_test.go | 154 +++++++++++++++++++++ internal/syscallfs/testdata/empty.txt | 0 internal/syscallfs/testdata/wazero.txt | 1 + 11 files changed, 399 insertions(+), 73 deletions(-) create mode 100644 internal/syscallfs/syscallfs_bench_test.go create mode 100644 internal/syscallfs/testdata/empty.txt create mode 100644 internal/syscallfs/testdata/wazero.txt diff --git a/RATIONALE.md b/RATIONALE.md index 4eccd6a6b4..b1a35ff21b 100644 --- a/RATIONALE.md +++ b/RATIONALE.md @@ -610,15 +610,19 @@ there are no strong incentives to complicate the logic. `ReadAt` is the Go equivalent to `pread`: it does not affect, and is not affected by, the underlying file offset. Unfortunately, `io.ReaderAt` is not -implemented by all `fs.File`, notably `embed.file`. +implemented by all `fs.File`. For example, as of Go 1.19, `embed.openFile` does +not. The initial implementation of `fd_pread` instead used `Seek`. To avoid a -regression we fallback to `io.Seeker` when `io.ReaderAt` is not supported. +regression, we fall back to `io.Seeker` when `io.ReaderAt` is not supported. This requires obtaining the initial file offset, seeking to the intended read -offset, and reseting the file offset the initial state. If this final seek +offset, and resetting the file offset the initial state. If this final seek fails, the file offset is left in an undefined state. This is not thread-safe. +While seeking per read seems expensive, the common case of `embed.openFile` is +only accessing a single int64 field, which is cheap. + ### Pre-opened files WASI includes `fd_prestat_get` and `fd_prestat_dir_name` functions used to diff --git a/imports/wasi_snapshot_preview1/fs.go b/imports/wasi_snapshot_preview1/fs.go index 5fe8638587..2504edd2c8 100644 --- a/imports/wasi_snapshot_preview1/fs.go +++ b/imports/wasi_snapshot_preview1/fs.go @@ -502,51 +502,26 @@ func fdReadOrPread(mod api.Module, params []uint64, isPread bool) Errno { fsc := mod.(*wasm.CallContext).Sys.FS() fd := uint32(params[0]) + + r, ok := fsc.LookupFile(fd) + if !ok { + return ErrnoBadf + } + + var reader io.Reader = r.File + iovs := uint32(params[1]) iovsCount := uint32(params[2]) - var offset int64 var resultNread uint32 if isPread { - offset = int64(params[3]) + offset := int64(params[3]) + reader = syscallfs.ReaderAtOffset(r.File, offset) resultNread = uint32(params[4]) } else { resultNread = uint32(params[3]) } - r, ok := fsc.LookupFile(fd) - if !ok { - return ErrnoBadf - } - - read := r.File.Read - if isPread { - if ra, ok := r.File.(io.ReaderAt); ok { - // ReadAt is the Go equivalent to pread. - read = func(p []byte) (int, error) { - n, err := ra.ReadAt(p, offset) - offset += int64(n) - return n, err - } - } else if s, ok := r.File.(io.Seeker); ok { - // Unfortunately, it is often not supported. - // See /RATIONALE.md "fd_pread: io.Seeker fallback when io.ReaderAt is not supported" - initialOffset, err := s.Seek(0, io.SeekCurrent) - if err != nil { - return ErrnoInval - } - defer func() { _, _ = s.Seek(initialOffset, io.SeekStart) }() - if offset != initialOffset { - _, err := s.Seek(offset, io.SeekStart) - if err != nil { - return ErrnoInval - } - } - } else { - return ErrnoInval - } - } - var nread uint32 iovsStop := iovsCount << 3 // iovsCount * 8 iovsBuf, ok := mem.Read(iovs, iovsStop) @@ -563,7 +538,7 @@ func fdReadOrPread(mod api.Module, params []uint64, isPread bool) Errno { return ErrnoFault } - n, err := read(b) + n, err := reader.Read(b) nread += uint32(n) shouldContinue, errno := fdRead_shouldContinueRead(uint32(n), l, err) diff --git a/imports/wasi_snapshot_preview1/fs_test.go b/imports/wasi_snapshot_preview1/fs_test.go index cf29317024..15bf411f6c 100644 --- a/imports/wasi_snapshot_preview1/fs_test.go +++ b/imports/wasi_snapshot_preview1/fs_test.go @@ -604,18 +604,8 @@ func Test_fdPread_Errors(t *testing.T) { memory: []byte{'?', '?', '?', '?'}, // pass result.nread validation expectedErrno: ErrnoBadf, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=42,iovs=65532,iovs_len=65532,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=42,iovs=65532,iovs_len=0,offset=0) <== (nread=,errno=EBADF) -`, - }, - { - name: "seek past file", - fd: fd, - offset: int64(len(contents) + 1), - expectedErrno: ErrnoFault, - expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=65536,offset=7) -<== (nread=,errno=EFAULT) `, }, { @@ -625,7 +615,7 @@ func Test_fdPread_Errors(t *testing.T) { memory: []byte{'?'}, expectedErrno: ErrnoFault, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=65535,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65536,iovs_len=0,offset=0) <== (nread=,errno=EFAULT) `, }, @@ -639,7 +629,7 @@ func Test_fdPread_Errors(t *testing.T) { }, expectedErrno: ErrnoFault, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65532,iovs_len=65532,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65532,iovs_len=1,offset=0) <== (nread=,errno=EFAULT) `, }, @@ -654,7 +644,7 @@ func Test_fdPread_Errors(t *testing.T) { }, expectedErrno: ErrnoFault, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65528,iovs_len=65528,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65528,iovs_len=1,offset=0) <== (nread=,errno=EFAULT) `, }, @@ -670,7 +660,7 @@ func Test_fdPread_Errors(t *testing.T) { }, expectedErrno: ErrnoFault, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=65527,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=1,offset=0) <== (nread=,errno=EFAULT) `, }, @@ -687,8 +677,27 @@ func Test_fdPread_Errors(t *testing.T) { }, expectedErrno: ErrnoFault, expectedLog: ` -==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=65527,offset=0) +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65527,iovs_len=1,offset=0) <== (nread=,errno=EFAULT) +`, + }, + { + name: "offset negative", + fd: fd, + iovs: 1, iovsCount: 1, + resultNread: 10, + memory: []byte{ + '?', // `iovs` is after this + 9, 0, 0, 0, // = iovs[0].offset + 1, 0, 0, 0, // = iovs[0].length + '?', + '?', '?', '?', '?', + }, + offset: int64(-1), + expectedErrno: ErrnoIo, + expectedLog: ` +==> wasi_snapshot_preview1.fd_pread(fd=4,iovs=65523,iovs_len=1,offset=-1) +<== (nread=,errno=EIO) `, }, } @@ -703,7 +712,7 @@ func Test_fdPread_Errors(t *testing.T) { memoryWriteOK := mod.Memory().Write(offset, tc.memory) require.True(t, memoryWriteOK) - requireErrno(t, tc.expectedErrno, mod, FdPreadName, uint64(tc.fd), uint64(tc.iovs+offset), uint64(tc.iovsCount+offset), uint64(tc.offset), uint64(tc.resultNread+offset)) + requireErrno(t, tc.expectedErrno, mod, FdPreadName, uint64(tc.fd), uint64(tc.iovs+offset), uint64(tc.iovsCount), uint64(tc.offset), uint64(tc.resultNread+offset)) require.Equal(t, tc.expectedLog, "\n"+log.String()) }) } diff --git a/internal/gojs/fs.go b/internal/gojs/fs.go index 7d5a8f0d65..64e35e5f79 100644 --- a/internal/gojs/fs.go +++ b/internal/gojs/fs.go @@ -274,17 +274,13 @@ func syscallRead(mod api.Module, fd uint32, offset interface{}, p []byte) (n uin err = syscall.EBADF } + var reader io.Reader = f.File + if offset != nil { - if s, ok := f.File.(io.Seeker); ok { - if _, err := s.Seek(toInt64(offset), io.SeekStart); err != nil { - return 0, err - } - } else { - return 0, syscall.ENOTSUP - } + reader = syscallfs.ReaderAtOffset(f.File, toInt64(offset)) } - if nRead, e := f.File.Read(p); e == nil || e == io.EOF { + if nRead, e := reader.Read(p); e == nil || e == io.EOF { // fs_js.go cannot parse io.EOF so coerce it to nil. // See https://github.com/golang/go/issues/43913 n = uint32(nRead) @@ -309,7 +305,7 @@ func (jsfsWrite) invoke(ctx context.Context, mod api.Module, args ...interface{} } offset := goos.ValueToUint32(args[2]) byteCount := goos.ValueToUint32(args[3]) - fOffset := args[4] // nil unless Pread + fOffset := args[4] // nil unless Pwrite callback := args[5].(funcWrapper) if byteCount > 0 { // empty is possible on EOF diff --git a/internal/gojs/testdata/fs/main.go b/internal/gojs/testdata/fs/main.go index 633338c32f..9c07f3dc97 100644 --- a/internal/gojs/testdata/fs/main.go +++ b/internal/gojs/testdata/fs/main.go @@ -1,7 +1,9 @@ package fs import ( + "bytes" "fmt" + "io" "log" "os" "syscall" @@ -34,12 +36,35 @@ func testAdHoc() { } } + // Read the full contents of the file using io.Reader b, err := os.ReadFile("/animals.txt") if err != nil { log.Panicln(err) } fmt.Println("contents:", string(b)) + // Re-open the same file to test io.ReaderAt + f, err := os.Open("/animals.txt") + if err != nil { + log.Panicln(err) + } + defer f.Close() + + // Seek to an arbitrary position. + if _, err = f.Seek(4, io.SeekStart); err != nil { + log.Panicln(err) + } + + b1 := make([]byte, len(b)) + // We expect to revert to the original position on ReadAt zero. + if _, err = f.ReadAt(b1, 0); err != nil { + log.Panicln(err) + } + + if !bytes.Equal(b, b1) { + log.Panicln("unexpected ReadAt contents: ", string(b1)) + } + b, err = os.ReadFile("/empty.txt") if err != nil { log.Panicln(err) diff --git a/internal/syscallfs/adapter_test.go b/internal/syscallfs/adapter_test.go index 1d489a4fb2..1c2d8e8da0 100644 --- a/internal/syscallfs/adapter_test.go +++ b/internal/syscallfs/adapter_test.go @@ -125,16 +125,36 @@ func TestAdapt_TestFS(t *testing.T) { // Make a new fs.FS, noting the Go 1.17 fstest doesn't automatically filter // "." entries in a directory. TODO: remove when we remove 1.17 - fsFS := make(gofstest.MapFS, len(fstest.FS)-1) + mapFS := make(gofstest.MapFS, len(fstest.FS)-1) for k, v := range fstest.FS { if k != "." { - fsFS[k] = v + mapFS[k] = v } } - // Adapt a normal fs.FS to syscallfs.FS - testFS := Adapt("/", fsFS) + tmpDir := t.TempDir() + require.NoError(t, fstest.WriteTestFiles(tmpDir)) + dirFS := os.DirFS(tmpDir) + + // TODO: We can't currently test embed.FS here because the source of + // fstest.FS are not real files. + tests := []struct { + name string + fs fs.FS + }{ + {name: "os.DirFS", fs: dirFS}, + {name: "fstest.MapFS", fs: mapFS}, + } + + for _, tc := range tests { + tc := tc - // Adapt it back to fs.FS and run the tests - require.NoError(t, fstest.TestFS(&testFSAdapter{testFS})) + t.Run(tc.name, func(t *testing.T) { + // Adapt a normal fs.FS to syscallfs.FS + testFS := Adapt("/", tc.fs) + + // Adapt it back to fs.FS and run the tests + require.NoError(t, fstest.TestFS(&testFSAdapter{testFS})) + }) + } } diff --git a/internal/syscallfs/syscallfs.go b/internal/syscallfs/syscallfs.go index 195ff6976a..6c6c9a5b8a 100644 --- a/internal/syscallfs/syscallfs.go +++ b/internal/syscallfs/syscallfs.go @@ -4,6 +4,7 @@ import ( "io" "io/fs" "os" + "syscall" ) // FS is a writeable fs.FS bridge backed by syscall functions needed for ABI @@ -134,8 +135,8 @@ func StatPath(fs FS, path string) (fs.FileInfo, error) { // readFile declares all read interfaces defined on os.File used by wazero. type readFile interface { fs.ReadDirFile - io.ReaderAt - io.Seeker + io.ReaderAt // for pread + io.Seeker // fallback for ReaderAt for embed:fs } // file declares all interfaces defined on os.File used by wazero. @@ -146,3 +147,81 @@ type file interface { } type syncer interface{ Sync() error } + +// ReaderAtOffset gets an io.Reader from a fs.File that reads from an offset, +// yet doesn't affect the underlying position. This is used to implement +// syscall.Pread. +// +// Note: The file accessed shouldn't be used concurrently, but wasm isn't safe +// to use concurrently anyway. Hence, we don't do any locking against parallel +// reads. +func ReaderAtOffset(f fs.File, offset int64) io.Reader { + if ret, ok := f.(io.ReaderAt); ok { + return &readerAtOffset{ret, offset} + } else if ret, ok := f.(io.ReadSeeker); ok { + return &seekToOffsetReader{ret, offset} + } else { + return enosysReader{} + } +} + +type enosysReader struct{} + +// enosysReader implements io.Reader +func (rs enosysReader) Read([]byte) (n int, err error) { + return 0, syscall.ENOSYS +} + +type readerAtOffset struct { + r io.ReaderAt + offset int64 +} + +// Read implements io.Reader +func (r *readerAtOffset) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil // less overhead on zero-length reads. + } + + n, err := r.r.ReadAt(p, r.offset) + r.offset += int64(n) + return n, err +} + +// seekToOffsetReader implements io.Reader that seeks to an offset and reverts +// to its initial offset after each call to Read. +// +// See /RATIONALE.md "fd_pread: io.Seeker fallback when io.ReaderAt is not supported" +type seekToOffsetReader struct { + s io.ReadSeeker + offset int64 +} + +// Read implements io.Reader +func (rs *seekToOffsetReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil // less overhead on zero-length reads. + } + + // Determine the current position in the file, as we need to revert it. + currentOffset, err := rs.s.Seek(0, io.SeekCurrent) + if err != nil { + return 0, err + } + + // Put the read position back when complete. + defer func() { _, _ = rs.s.Seek(currentOffset, io.SeekStart) }() + + // If the current offset isn't in sync with this reader, move it. + if rs.offset != currentOffset { + _, err := rs.s.Seek(rs.offset, io.SeekStart) + if err != nil { + return 0, err + } + } + + // Perform the read, updating the offset. + n, err := rs.s.Read(p) + rs.offset += int64(n) + return n, err +} diff --git a/internal/syscallfs/syscallfs_bench_test.go b/internal/syscallfs/syscallfs_bench_test.go new file mode 100644 index 0000000000..6f1a0cb894 --- /dev/null +++ b/internal/syscallfs/syscallfs_bench_test.go @@ -0,0 +1,63 @@ +package syscallfs + +import ( + "io" + "io/fs" + "os" + "testing" +) + +func BenchmarkReaderAtOffset(b *testing.B) { + dirFS := os.DirFS("testdata") + embedFS, err := fs.Sub(readerAtFS, "testdata") + if err != nil { + b.Fatal(err) + } + + benches := []struct { + name string + fs fs.FS + ra bool + }{ + {name: "os.DirFS io.File", fs: dirFS, ra: false}, + {name: "os.DirFS readerAtOffset", fs: dirFS, ra: true}, + {name: "embed.FS embed.file", fs: embedFS, ra: false}, + {name: "embed.FS seekToOffsetReader", fs: embedFS, ra: true}, + } + + buf := make([]byte, 3) + + for _, bc := range benches { + bc := bc + + b.Run(bc.name, func(b *testing.B) { + f, err := bc.fs.Open("wazero.txt") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + + // Reset the read position back to the beginning of the file. + if _, err = f.(io.Seeker).Seek(0, io.SeekStart); err != nil { + b.Fatal(err) + } + + // Get the reader we are benchmarking + var r io.Reader = f + if bc.ra { + r = ReaderAtOffset(f, 3) + } + + b.StartTimer() + if _, err := r.Read(buf); err != nil { + b.Fatal(err) + } + b.StopTimer() + } + }) + } +} diff --git a/internal/syscallfs/syscallfs_test.go b/internal/syscallfs/syscallfs_test.go index af25e5c37a..91290a8c2d 100644 --- a/internal/syscallfs/syscallfs_test.go +++ b/internal/syscallfs/syscallfs_test.go @@ -1,6 +1,8 @@ package syscallfs import ( + "embed" + _ "embed" "errors" "io" "io/fs" @@ -11,6 +13,7 @@ import ( "strings" "syscall" "testing" + "testing/fstest" "time" "github.com/tetratelabs/wazero/internal/platform" @@ -257,3 +260,154 @@ func (f *testFSAdapter) Open(name string) (fs.File, error) { func requireErrno(t *testing.T, expected syscall.Errno, actual error) { require.True(t, errors.Is(actual, expected), "expected %v, but was %v", expected, actual) } + +var ( + //go:embed testdata/*.txt + readerAtFS embed.FS + readerAtFile = "wazero.txt" + emptyFile = "empty.txt" +) + +func TestReaderAtOffset(t *testing.T) { + embedFS, err := fs.Sub(readerAtFS, "testdata") + require.NoError(t, err) + + d, err := embedFS.Open(readerAtFile) + require.NoError(t, err) + defer d.Close() + + bytes, err := io.ReadAll(d) + require.NoError(t, err) + + mapFS := fstest.MapFS{readerAtFile: &fstest.MapFile{Data: bytes}} + + // Write a file as can't open "testdata" in scratch tests because they + // can't read the original filesystem. + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(path.Join(tmpDir, readerAtFile), bytes, 0o600)) + dirFS := os.DirFS(tmpDir) + + tests := []struct { + name string + fs fs.FS + }{ + {name: "os.DirFS", fs: dirFS}, + {name: "embed.FS", fs: embedFS}, + {name: "fstest.MapFS", fs: mapFS}, + } + + buf := make([]byte, 3) + + for _, tc := range tests { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + f, err := tc.fs.Open(readerAtFile) + require.NoError(t, err) + defer f.Close() + + var r io.Reader = f + ra := ReaderAtOffset(f, 0) + + requireRead3 := func(r io.Reader, buf []byte) { + n, err := r.Read(buf) + require.NoError(t, err) + require.Equal(t, 3, n) + } + + // The file should work as a reader (base case) + requireRead3(r, buf) + require.Equal(t, "waz", string(buf)) + buf = buf[:] + + // The readerAt impl should be able to start from zero also + requireRead3(ra, buf) + require.Equal(t, "waz", string(buf)) + buf = buf[:] + + // If the offset didn't change, we expect the next three chars. + requireRead3(r, buf) + require.Equal(t, "ero", string(buf)) + buf = buf[:] + + // If state was held between reader-at, we expect the same + requireRead3(ra, buf) + require.Equal(t, "ero", string(buf)) + buf = buf[:] + + // We should also be able to make another reader-at + ra = ReaderAtOffset(f, 3) + requireRead3(ra, buf) + require.Equal(t, "ero", string(buf)) + }) + } +} + +func TestReaderAtOffset_empty(t *testing.T) { + embedFS, err := fs.Sub(readerAtFS, "testdata") + require.NoError(t, err) + + d, err := embedFS.Open(readerAtFile) + require.NoError(t, err) + defer d.Close() + + mapFS := fstest.MapFS{emptyFile: &fstest.MapFile{}} + + // Write a file as can't open "testdata" in scratch tests because they + // can't read the original filesystem. + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(path.Join(tmpDir, emptyFile), []byte{}, 0o600)) + dirFS := os.DirFS(tmpDir) + + tests := []struct { + name string + fs fs.FS + }{ + {name: "os.DirFS", fs: dirFS}, + {name: "embed.FS", fs: embedFS}, + {name: "fstest.MapFS", fs: mapFS}, + } + + buf := make([]byte, 3) + + for _, tc := range tests { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + f, err := tc.fs.Open(emptyFile) + require.NoError(t, err) + defer f.Close() + + var r io.Reader = f + ra := ReaderAtOffset(f, 0) + + requireRead3 := func(r io.Reader, buf []byte) { + n, err := r.Read(buf) + require.Equal(t, err, io.EOF) + require.Equal(t, 0, n) // file is empty + } + + // The file should work as a reader (base case) + requireRead3(r, buf) + + // The readerAt impl should be able to start from zero also + requireRead3(ra, buf) + }) + } +} + +func TestReaderAtOffset_Unsupported(t *testing.T) { + embedFS, err := fs.Sub(readerAtFS, "testdata") + require.NoError(t, err) + + f, err := embedFS.Open(emptyFile) + require.NoError(t, err) + defer f.Close() + + // mask both io.ReaderAt and io.Seeker + ra := ReaderAtOffset(struct{ fs.File }{f}, 0) + + buf := make([]byte, 3) + _, err = ra.Read(buf) + require.Equal(t, syscall.ENOSYS, err) +} diff --git a/internal/syscallfs/testdata/empty.txt b/internal/syscallfs/testdata/empty.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/syscallfs/testdata/wazero.txt b/internal/syscallfs/testdata/wazero.txt new file mode 100644 index 0000000000..3058e80561 --- /dev/null +++ b/internal/syscallfs/testdata/wazero.txt @@ -0,0 +1 @@ +wazero