From 245b84c887955d12cd1113e9a6701ee7338c8255 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Mon, 22 May 2017 22:26:59 +0200 Subject: [PATCH] nametransform: diriv cache: fall back to the grandparent When a user calls into a deep directory hierarchy, we often get a sequence like this from the kernel: LOOKUP a LOOKUP a/b LOOKUP a/b/c LOOKUP a/b/c/d The diriv cache was not effective for this pattern, because it was designed for this: LOOKUP a/a LOOKUP a/b LOOKUP a/c LOOKUP a/d By also using the cached entry of the grandparent we can avoid lots of diriv reads. This benchmark is against a large encrypted directory hosted on NFS: Before: $ time ls -R nfs-backed-mount > /dev/null real 1m35.976s user 0m0.248s sys 0m0.281s After: $ time ls -R nfs-backed-mount > /dev/null real 1m3.670s user 0m0.217s sys 0m0.403s --- internal/nametransform/diriv.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/internal/nametransform/diriv.go b/internal/nametransform/diriv.go index 00d059f..e74592a 100644 --- a/internal/nametransform/diriv.go +++ b/internal/nametransform/diriv.go @@ -90,7 +90,8 @@ func WriteDirIV(dir string) error { return nil } -// EncryptPathDirIV - encrypt relative plaintext path using EME with DirIV. +// EncryptPathDirIV - encrypt relative plaintext path "plainPath" using EME with +// DirIV. "rootDir" is the backing storage root directory. // Components that are longer than 255 bytes are hashed if be.longnames == true. func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cipherPath string, err error) { // Empty string means root directory @@ -103,7 +104,8 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip if len(baseName) > syscall.NAME_MAX { return "", syscall.ENAMETOOLONG } - // Check if the DirIV is cached + // Check if the DirIV is cached. This catches the case of the user iterating + // over files in a directory pretty well. parentDir := filepath.Dir(plainPath) iv, cParentDir := be.DirIVCache.lookup(parentDir) if iv != nil { @@ -114,10 +116,27 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip cipherPath = filepath.Join(cParentDir, cBaseName) return cipherPath, nil } - // Not cached - walk the directory tree - var wd = rootDir - var encryptedNames []string + // We have to walk the directory tree, in the worst case starting at the root + // directory. + wd := rootDir plainNames := strings.Split(plainPath, "/") + // So the DirIV we need is not cached. But maybe one level higher is + // cached. Then we can skip a few items in the directory walk. + // The catches the case of walking directories recursively. + parentDir2 := filepath.Dir(parentDir) + iv, cParentDir = be.DirIVCache.lookup(parentDir2) + if iv != nil { + parentDirBase := filepath.Base(parentDir) + cBaseName := be.EncryptName(parentDirBase, iv) + if be.longNames && len(cBaseName) > syscall.NAME_MAX { + cBaseName = be.HashLongName(cBaseName) + } + wd = filepath.Join(wd, cParentDir, cBaseName) + cipherPath = filepath.Join(cParentDir, cBaseName) + skip := len(strings.Split(cipherPath, "/")) + plainNames = plainNames[skip:] + } + // Walk the directory tree starting at "wd" for _, plainName := range plainNames { iv, err = ReadDirIV(wd) if err != nil { @@ -127,10 +146,9 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip if be.longNames && len(encryptedName) > syscall.NAME_MAX { encryptedName = be.HashLongName(encryptedName) } - encryptedNames = append(encryptedNames, encryptedName) + cipherPath = filepath.Join(cipherPath, encryptedName) wd = filepath.Join(wd, encryptedName) } - cipherPath = filepath.Join(encryptedNames...) // Cache the final DirIV cParentDir = filepath.Dir(cipherPath) be.DirIVCache.store(parentDir, iv, cParentDir)