Skip to content

Commit

Permalink
Cache directory entries in the overlay
Browse files Browse the repository at this point in the history
Currently, the overlay dirCache is only used for a single logical use of
getdents. i.e., it is discard when the FD is closed or seeked back to
the beginning.

But the initial work of getting the directory contents can be quite
expensive (particularly sorting large directories), so we should keep it
as long as possible.

This is very similar to the readdirCache in fs/gofer.

Since the upper filesystem does not have to allow caching readdir
entries, the new CacheReaddir MountSourceOperations method controls this
behavior.

This caching should be trivially movable to all Inodes if desired,
though that adds an additional copy step for non-overlay Inodes.
(Overlay Inodes already do the extra copy).

PiperOrigin-RevId: 255477592
  • Loading branch information
prattmic authored and gvisor-bot committed Jun 27, 2019
1 parent e276083 commit 085a907
Show file tree
Hide file tree
Showing 17 changed files with 174 additions and 67 deletions.
1 change: 1 addition & 0 deletions pkg/sentry/fs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ go_library(
"//pkg/state",
"//pkg/syserror",
"//pkg/waiter",
"//third_party/gvsync",
],
)

Expand Down
4 changes: 2 additions & 2 deletions pkg/sentry/fs/dirent.go
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ type DirIterator interface {
// calls, and must start with the given offset.
//
// The caller must ensure that this operation is permitted.
IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error)
IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error)
}

// DirentReaddir serializes the directory entries of d including "." and "..".
Expand Down Expand Up @@ -988,7 +988,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
// it.IterateDir should be passed an offset that does not include the
// initial dot elements. We will add them back later.
offset -= 2
newOffset, err := it.IterateDir(ctx, dirCtx, int(offset))
newOffset, err := it.IterateDir(ctx, d, dirCtx, int(offset))
if int64(newOffset) < offset {
panic(fmt.Sprintf("node.Readdir returned offset %v less than input offset %v", newOffset, offset))
}
Expand Down
85 changes: 47 additions & 38 deletions pkg/sentry/fs/file_overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,6 @@ type overlayFileOperations struct {
// protected by File.mu of the owning file, which is held during
// Readdir and Seek calls.
dirCursor string

// dirCacheMu protects dirCache.
dirCacheMu sync.RWMutex `state:"nosave"`

// dirCache is cache of DentAttrs from upper and lower Inodes.
dirCache *SortedDentryMap
}

// Release implements FileOperations.Release.
Expand Down Expand Up @@ -171,53 +165,68 @@ func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, seriali
if root != nil {
defer root.DecRef()
}

dirCtx := &DirCtx{
Serializer: serializer,
DirCursor: &f.dirCursor,
}
return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
}

// If the directory dirent is frozen, then DirentReaddir will calculate
// the children based off the frozen dirent tree. There is no need to
// call readdir on the upper/lower layers.
if file.Dirent.frozen {
return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
// IterateDir implements DirIterator.IterateDir.
func (f *overlayFileOperations) IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error) {
o := d.Inode.overlay

if !d.Inode.MountSource.CacheReaddir() {
// Can't use the dirCache. Simply read the entries.
entries, err := readdirEntries(ctx, o)
if err != nil {
return offset, err
}
n, err := GenericReaddir(dirCtx, entries)
return offset + n, err
}

// Otherwise proceed with usual overlay readdir.
o := file.Dirent.Inode.overlay
// Otherwise, use or create cached entries.

o.dirCacheMu.RLock()
if o.dirCache != nil {
n, err := GenericReaddir(dirCtx, o.dirCache)
o.dirCacheMu.RUnlock()
return offset + n, err
}
o.dirCacheMu.RUnlock()

// readdirEntries holds o.copyUpMu to ensure that copy-up does not
// occur while calculating the readir results.
// occur while calculating the readdir results.
//
// However, it is possible for a copy-up to occur after the call to
// readdirEntries, but before setting f.dirCache. This is OK, since
// copy-up only does not change the children in a way that would affect
// the children returned in dirCache. Copy-up only moves
// files/directories between layers in the overlay.
// readdirEntries, but before setting o.dirCache. This is OK, since
// copy-up does not change the children in a way that would affect the
// children returned in dirCache. Copy-up only moves files/directories
// between layers in the overlay.
//
// It is also possible for Readdir to race with a Create operation
// (which may trigger a copy-up during it's execution). Depending on
// whether the Create happens before or after the readdirEntries call,
// the newly created file may or may not appear in the readdir results.
// But this can only be caused by a real race between readdir and
// create syscalls, so it's also OK.
dirCache, err := readdirEntries(ctx, o)
if err != nil {
return file.Offset(), err
// We must hold dirCacheMu around both readdirEntries and setting
// o.dirCache to synchronize with dirCache invalidations done by
// Create, Remove, Rename.
o.dirCacheMu.Lock()

// We expect dirCache to be nil (we just checked above), but there is a
// chance that a racing call managed to just set it, in which case we
// can use that new value.
if o.dirCache == nil {
dirCache, err := readdirEntries(ctx, o)
if err != nil {
o.dirCacheMu.Unlock()
return offset, err
}
o.dirCache = dirCache
}

f.dirCacheMu.Lock()
f.dirCache = dirCache
f.dirCacheMu.Unlock()
o.dirCacheMu.DowngradeLock()
n, err := GenericReaddir(dirCtx, o.dirCache)
o.dirCacheMu.RUnlock()

return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
}

// IterateDir implements DirIterator.IterateDir.
func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) {
f.dirCacheMu.RLock()
n, err := GenericReaddir(dirCtx, f.dirCache)
f.dirCacheMu.RUnlock()
return offset + n, err
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/sentry/fs/fsutil/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ func NewStaticDirFileOperations(dentries *fs.SortedDentryMap) *StaticDirFileOper
}

// IterateDir implements DirIterator.IterateDir.
func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
n, err := fs.GenericReaddir(dirCtx, sdfo.dentryMap)
return offset + n, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sentry/fs/gofer/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer
}

// IterateDir implements fs.DirIterator.IterateDir.
func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
f.inodeOperations.readdirMu.Lock()
defer f.inodeOperations.readdirMu.Unlock()

Expand Down
9 changes: 7 additions & 2 deletions pkg/sentry/fs/gofer/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,21 @@ func (s *session) Destroy() {
s.client.Close()
}

// Revalidate implements MountSource.Revalidate.
// Revalidate implements MountSourceOperations.Revalidate.
func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool {
return s.cachePolicy.revalidate(ctx, name, parent, child)
}

// Keep implements MountSource.Keep.
// Keep implements MountSourceOperations.Keep.
func (s *session) Keep(d *fs.Dirent) bool {
return s.cachePolicy.keep(d)
}

// CacheReaddir implements MountSourceOperations.CacheReaddir.
func (s *session) CacheReaddir() bool {
return s.cachePolicy.cacheReaddir()
}

// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
func (s *session) ResetInodeMappings() {
s.inodeMappings = make(map[uint64]string)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sentry/fs/host/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer
}

// IterateDir implements fs.DirIterator.IterateDir.
func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
if f.dirinfo == nil {
f.dirinfo = new(dirInfo)
f.dirinfo.buf = make([]byte, usermem.PageSize)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sentry/fs/host/inode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ func TestMultipleReaddir(t *testing.T) {
defer openFile.DecRef()

c1 := &fs.DirCtx{DirCursor: new(string)}
if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c1, 0); err != nil {
if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil {
t.Fatalf("First Readdir failed: %v", err)
}

c2 := &fs.DirCtx{DirCursor: new(string)}
if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c2, 0); err != nil {
if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil {
t.Errorf("Second Readdir failed: %v", err)
}

Expand Down
38 changes: 34 additions & 4 deletions pkg/sentry/fs/inode_overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st
return nil, err
}

// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()

// Take another reference on the upper file's inode, which will be
// owned by the overlay entry.
upperFile.Dirent.Inode.IncRef()
Expand Down Expand Up @@ -265,15 +268,25 @@ func overlayCreateDirectory(ctx context.Context, o *overlayEntry, parent *Dirent
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
return o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm)
if err := o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm); err != nil {
return err
}
// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

func overlayCreateLink(ctx context.Context, o *overlayEntry, parent *Dirent, oldname string, newname string) error {
// Dirent.CreateLink takes renameMu if the Inode is an overlay Inode.
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
return o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname)
if err := o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname); err != nil {
return err
}
// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent, target *Dirent, name string) error {
Expand All @@ -285,15 +298,25 @@ func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent,
if err := copyUpLockedForRename(ctx, target); err != nil {
return err
}
return o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name)
if err := o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name); err != nil {
return err
}
// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

func overlayCreateFifo(ctx context.Context, o *overlayEntry, parent *Dirent, name string, perm FilePermissions) error {
// Dirent.CreateFifo takes renameMu if the Inode is an overlay Inode.
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
return o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm)
if err := o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm); err != nil {
return err
}
// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child *Dirent) error {
Expand All @@ -318,6 +341,8 @@ func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child *
if child.Inode.overlay.lowerExists {
return overlayCreateWhiteout(o.upper, child.name)
}
// We've removed from the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

Expand Down Expand Up @@ -395,6 +420,8 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena
if renamed.Inode.overlay.lowerExists {
return overlayCreateWhiteout(oldParent.Inode.overlay.upper, oldName)
}
// We've changed the directory so we must drop the cache.
o.markDirectoryDirty()
return nil
}

Expand All @@ -411,6 +438,9 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri
return nil, err
}

// We've added to the directory so we must drop the cache.
o.markDirectoryDirty()

// Grab the inode and drop the dirent, we don't need it.
inode := d.Inode
inode.IncRef()
Expand Down
6 changes: 6 additions & 0 deletions pkg/sentry/fs/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ func (n *MockMountSourceOps) Keep(dirent *Dirent) bool {
return n.keep
}

// CacheReaddir implements fs.MountSourceOperations.CacheReaddir.
func (n *MockMountSourceOps) CacheReaddir() bool {
// Common case: cache readdir results if there is a dirent cache.
return n.keep
}

// WriteOut implements fs.InodeOperations.WriteOut.
func (n *MockInodeOperations) WriteOut(context.Context, *Inode) error {
return nil
Expand Down
Loading

0 comments on commit 085a907

Please sign in to comment.