nametransform: diriv cache: fall back to the grandparent

When a user calls into a deep directory hierarchy, we often
get a sequence like this from the kernel:

LOOKUP a
LOOKUP a/b
LOOKUP a/b/c
LOOKUP a/b/c/d

The diriv cache was not effective for this pattern, because it
was designed for this:

LOOKUP a/a
LOOKUP a/b
LOOKUP a/c
LOOKUP a/d

By also using the cached entry of the grandparent we can avoid lots
of diriv reads.

This benchmark is against a large encrypted directory hosted on NFS:

Before:

  $ time ls -R nfs-backed-mount > /dev/null
  real	1m35.976s
  user	0m0.248s
  sys	0m0.281s

After:

  $ time ls -R nfs-backed-mount > /dev/null
  real	1m3.670s
  user	0m0.217s
  sys 	0m0.403s
This commit is contained in:
Jakob Unterwurzacher 2017-05-22 22:26:59 +02:00
parent 5672e41f5a
commit 245b84c887
1 changed files with 25 additions and 7 deletions

View File

@ -90,7 +90,8 @@ func WriteDirIV(dir string) error {
return nil
}
// EncryptPathDirIV - encrypt relative plaintext path using EME with DirIV.
// EncryptPathDirIV - encrypt relative plaintext path "plainPath" using EME with
// DirIV. "rootDir" is the backing storage root directory.
// Components that are longer than 255 bytes are hashed if be.longnames == true.
func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cipherPath string, err error) {
// Empty string means root directory
@ -103,7 +104,8 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip
if len(baseName) > syscall.NAME_MAX {
return "", syscall.ENAMETOOLONG
}
// Check if the DirIV is cached
// Check if the DirIV is cached. This catches the case of the user iterating
// over files in a directory pretty well.
parentDir := filepath.Dir(plainPath)
iv, cParentDir := be.DirIVCache.lookup(parentDir)
if iv != nil {
@ -114,10 +116,27 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip
cipherPath = filepath.Join(cParentDir, cBaseName)
return cipherPath, nil
}
// Not cached - walk the directory tree
var wd = rootDir
var encryptedNames []string
// We have to walk the directory tree, in the worst case starting at the root
// directory.
wd := rootDir
plainNames := strings.Split(plainPath, "/")
// So the DirIV we need is not cached. But maybe one level higher is
// cached. Then we can skip a few items in the directory walk.
// The catches the case of walking directories recursively.
parentDir2 := filepath.Dir(parentDir)
iv, cParentDir = be.DirIVCache.lookup(parentDir2)
if iv != nil {
parentDirBase := filepath.Base(parentDir)
cBaseName := be.EncryptName(parentDirBase, iv)
if be.longNames && len(cBaseName) > syscall.NAME_MAX {
cBaseName = be.HashLongName(cBaseName)
}
wd = filepath.Join(wd, cParentDir, cBaseName)
cipherPath = filepath.Join(cParentDir, cBaseName)
skip := len(strings.Split(cipherPath, "/"))
plainNames = plainNames[skip:]
}
// Walk the directory tree starting at "wd"
for _, plainName := range plainNames {
iv, err = ReadDirIV(wd)
if err != nil {
@ -127,10 +146,9 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip
if be.longNames && len(encryptedName) > syscall.NAME_MAX {
encryptedName = be.HashLongName(encryptedName)
}
encryptedNames = append(encryptedNames, encryptedName)
cipherPath = filepath.Join(cipherPath, encryptedName)
wd = filepath.Join(wd, encryptedName)
}
cipherPath = filepath.Join(encryptedNames...)
// Cache the final DirIV
cParentDir = filepath.Dir(cipherPath)
be.DirIVCache.store(parentDir, iv, cParentDir)