v2api: implement Create

2020-06-21 13:25:12 +02:00 · 2020-06-21 13:25:12 +02:00 · f6ded09e36
commit f6ded09e36
parent 74a4accf0c
6 changed files with 920 additions and 0 deletions
--- a/internal/fusefrontend/file2.go
+++ b/internal/fusefrontend/file2.go
@ -0,0 +1,476 @@
 package fusefrontend
 // FUSE operations on file handles
 import (
 	"bytes"
 	"encoding/hex"
 	"fmt"
 	"io"
 	"log"
 	"os"
 	"sync"
 	"syscall"
 	"time"
 	"github.com/hanwen/go-fuse/v2/fuse"
 	"github.com/hanwen/go-fuse/v2/fuse/nodefs"
 	"github.com/rfjakob/gocryptfs/internal/contentenc"
 	"github.com/rfjakob/gocryptfs/internal/inomap"
 	"github.com/rfjakob/gocryptfs/internal/openfiletable"
 	"github.com/rfjakob/gocryptfs/internal/serialize_reads"
 	"github.com/rfjakob/gocryptfs/internal/stupidgcm"
 	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 var _ nodefs.File = &File{} // Verify that interface is implemented.
 // File - based on loopbackFile in go-fuse/fuse/nodefs/files.go
 type File2 struct {
 	fd *os.File
 	// Has Release() already been called on this file? This also means that the
 	// wlock entry has been freed, so let's not crash trying to access it.
 	// Due to concurrency, Release can overtake other operations. These will
 	// return EBADF in that case.
 	released bool
 	// fdLock prevents the fd to be closed while we are in the middle of
 	// an operation.
 	// Every FUSE entrypoint should RLock(). The only user of Lock() is
 	// Release(), which closes the fd and sets "released" to true.
 	fdLock sync.RWMutex
 	// Content encryption helper
 	contentEnc *contentenc.ContentEnc
 	// Device and inode number uniquely identify the backing file
 	qIno inomap.QIno
 	// Entry in the open file table
 	fileTableEntry *openfiletable.Entry
 	// Store where the last byte was written
 	lastWrittenOffset int64
 	// The opCount is used to judge whether "lastWrittenOffset" is still
 	// guaranteed to be correct.
 	lastOpCount uint64
 	// Parent filesystem
 	rootNode *RootNode
 	// We embed a nodefs.NewDefaultFile() that returns ENOSYS for every operation we
 	// have not implemented. This prevents build breakage when the go-fuse library
 	// adds new methods to the nodefs.File interface.
 	nodefs.File
 }
 // NewFile returns a new go-fuse File instance.
 func NewFile2(fd *os.File, rn *RootNode, st *syscall.Stat_t) *File2 {
 	qi := inomap.QInoFromStat(st)
 	e := openfiletable.Register(qi)
 	return &File2{
 		fd:             fd,
 		contentEnc:     rn.contentEnc,
 		qIno:           qi,
 		fileTableEntry: e,
 		rootNode:       rn,
 		File:           nodefs.NewDefaultFile(),
 	}
 }
 // intFd - return the backing file descriptor as an integer.
 func (f *File2) intFd() int {
 	return int(f.fd.Fd())
 }
 // readFileID loads the file header from disk and extracts the file ID.
 // Returns io.EOF if the file is empty.
 func (f *File2) readFileID() ([]byte, error) {
 	// We read +1 byte to determine if the file has actual content
 	// and not only the header. A header-only file will be considered empty.
 	// This makes File ID poisoning more difficult.
 	readLen := contentenc.HeaderLen + 1
 	buf := make([]byte, readLen)
 	n, err := f.fd.ReadAt(buf, 0)
 	if err != nil {
 		if err == io.EOF && n != 0 {
 			tlog.Warn.Printf("readFileID %d: incomplete file, got %d instead of %d bytes",
 				f.qIno.Ino, n, readLen)
 			f.rootNode.reportMitigatedCorruption(fmt.Sprint(f.qIno.Ino))
 		}
 		return nil, err
 	}
 	buf = buf[:contentenc.HeaderLen]
 	h, err := contentenc.ParseHeader(buf)
 	if err != nil {
 		return nil, err
 	}
 	return h.ID, nil
 }
 // createHeader creates a new random header and writes it to disk.
 // Returns the new file ID.
 // The caller must hold fileIDLock.Lock().
 func (f *File2) createHeader() (fileID []byte, err error) {
 	h := contentenc.RandomHeader()
 	buf := h.Pack()
 	// Prevent partially written (=corrupt) header by preallocating the space beforehand
 	if !f.rootNode.args.NoPrealloc {
 		err = syscallcompat.EnospcPrealloc(f.intFd(), 0, contentenc.HeaderLen)
 		if err != nil {
 			if !syscallcompat.IsENOSPC(err) {
 				tlog.Warn.Printf("ino%d: createHeader: prealloc failed: %s\n", f.qIno.Ino, err.Error())
 			}
 			return nil, err
 		}
 	}
 	// Actually write header
 	_, err = f.fd.WriteAt(buf, 0)
 	if err != nil {
 		return nil, err
 	}
 	return h.ID, err
 }
 // doRead - read "length" plaintext bytes from plaintext offset "off" and append
 // to "dst".
 // Arguments "length" and "off" do not have to be block-aligned.
 //
 // doRead reads the corresponding ciphertext blocks from disk, decrypts them and
 // returns the requested part of the plaintext.
 //
 // Called by Read() for normal reading,
 // by Write() and Truncate() via doWrite() for Read-Modify-Write.
 func (f *File2) doRead(dst []byte, off uint64, length uint64) ([]byte, fuse.Status) {
 	// Get the file ID, either from the open file table, or from disk.
 	var fileID []byte
 	f.fileTableEntry.IDLock.Lock()
 	if f.fileTableEntry.ID != nil {
 		// Use the cached value in the file table
 		fileID = f.fileTableEntry.ID
 	} else {
 		// Not cached, we have to read it from disk.
 		var err error
 		fileID, err = f.readFileID()
 		if err != nil {
 			f.fileTableEntry.IDLock.Unlock()
 			if err == io.EOF {
 				// Empty file
 				return nil, fuse.OK
 			}
 			buf := make([]byte, 100)
 			n, _ := f.fd.ReadAt(buf, 0)
 			buf = buf[:n]
 			hexdump := hex.EncodeToString(buf)
 			tlog.Warn.Printf("doRead %d: corrupt header: %v\nFile hexdump (%d bytes): %s",
 				f.qIno.Ino, err, n, hexdump)
 			return nil, fuse.EIO
 		}
 		// Save into the file table
 		f.fileTableEntry.ID = fileID
 	}
 	f.fileTableEntry.IDLock.Unlock()
 	if fileID == nil {
 		log.Panicf("fileID=%v", fileID)
 	}
 	// Read the backing ciphertext in one go
 	blocks := f.contentEnc.ExplodePlainRange(off, length)
 	alignedOffset, alignedLength := blocks[0].JointCiphertextRange(blocks)
 	skip := blocks[0].Skip
 	tlog.Debug.Printf("doRead: off=%d len=%d -> off=%d len=%d skip=%d\n",
 		off, length, alignedOffset, alignedLength, skip)
 	ciphertext := f.rootNode.contentEnc.CReqPool.Get()
 	ciphertext = ciphertext[:int(alignedLength)]
 	n, err := f.fd.ReadAt(ciphertext, int64(alignedOffset))
 	if err != nil && err != io.EOF {
 		tlog.Warn.Printf("read: ReadAt: %s", err.Error())
 		return nil, fuse.ToStatus(err)
 	}
 	// The ReadAt came back empty. We can skip all the decryption and return early.
 	if n == 0 {
 		f.rootNode.contentEnc.CReqPool.Put(ciphertext)
 		return dst, fuse.OK
 	}
 	// Truncate ciphertext buffer down to actually read bytes
 	ciphertext = ciphertext[0:n]
 	firstBlockNo := blocks[0].BlockNo
 	tlog.Debug.Printf("ReadAt offset=%d bytes (%d blocks), want=%d, got=%d", alignedOffset, firstBlockNo, alignedLength, n)
 	// Decrypt it
 	plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, fileID)
 	f.rootNode.contentEnc.CReqPool.Put(ciphertext)
 	if err != nil {
 		if f.rootNode.args.ForceDecode && err == stupidgcm.ErrAuth {
 			// We do not have the information which block was corrupt here anymore,
 			// but DecryptBlocks() has already logged it anyway.
 			tlog.Warn.Printf("doRead %d: off=%d len=%d: returning corrupt data due to forcedecode",
 				f.qIno.Ino, off, length)
 		} else {
 			curruptBlockNo := firstBlockNo + f.contentEnc.PlainOffToBlockNo(uint64(len(plaintext)))
 			tlog.Warn.Printf("doRead %d: corrupt block #%d: %v", f.qIno.Ino, curruptBlockNo, err)
 			return nil, fuse.EIO
 		}
 	}
 	// Crop down to the relevant part
 	var out []byte
 	lenHave := len(plaintext)
 	lenWant := int(skip + length)
 	if lenHave > lenWant {
 		out = plaintext[skip:lenWant]
 	} else if lenHave > int(skip) {
 		out = plaintext[skip:lenHave]
 	}
 	// else: out stays empty, file was smaller than the requested offset
 	out = append(dst, out...)
 	f.rootNode.contentEnc.PReqPool.Put(plaintext)
 	return out, fuse.OK
 }
 // Read - FUSE call
 func (f *File2) Read(buf []byte, off int64) (resultData fuse.ReadResult, code fuse.Status) {
 	if len(buf) > fuse.MAX_KERNEL_WRITE {
 		// This would crash us due to our fixed-size buffer pool
 		tlog.Warn.Printf("Read: rejecting oversized request with EMSGSIZE, len=%d", len(buf))
 		return nil, fuse.Status(syscall.EMSGSIZE)
 	}
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	f.fileTableEntry.ContentLock.RLock()
 	defer f.fileTableEntry.ContentLock.RUnlock()
 	tlog.Debug.Printf("ino%d: FUSE Read: offset=%d length=%d", f.qIno.Ino, off, len(buf))
 	if f.rootNode.args.SerializeReads {
 		serialize_reads.Wait(off, len(buf))
 	}
 	out, status := f.doRead(buf[:0], uint64(off), uint64(len(buf)))
 	if f.rootNode.args.SerializeReads {
 		serialize_reads.Done()
 	}
 	if status != fuse.OK {
 		return nil, status
 	}
 	tlog.Debug.Printf("ino%d: Read: status %v, returning %d bytes", f.qIno.Ino, status, len(out))
 	return fuse.ReadResultData(out), status
 }
 // doWrite - encrypt "data" and write it to plaintext offset "off"
 //
 // Arguments do not have to be block-aligned, read-modify-write is
 // performed internally as necessary
 //
 // Called by Write() for normal writing,
 // and by Truncate() to rewrite the last file block.
 //
 // Empty writes do nothing and are allowed.
 func (f *File2) doWrite(data []byte, off int64) (uint32, fuse.Status) {
 	fileWasEmpty := false
 	// Get the file ID, create a new one if it does not exist yet.
 	var fileID []byte
 	// The caller has exclusively locked ContentLock, which blocks all other
 	// readers and writers. No need to take IDLock.
 	if f.fileTableEntry.ID != nil {
 		fileID = f.fileTableEntry.ID
 	} else {
 		// If the file ID is not cached, read it from disk
 		var err error
 		fileID, err = f.readFileID()
 		// Write a new file header if the file is empty
 		if err == io.EOF {
 			fileID, err = f.createHeader()
 			fileWasEmpty = true
 		}
 		if err != nil {
 			return 0, fuse.ToStatus(err)
 		}
 		f.fileTableEntry.ID = fileID
 	}
 	// Handle payload data
 	dataBuf := bytes.NewBuffer(data)
 	blocks := f.contentEnc.ExplodePlainRange(uint64(off), uint64(len(data)))
 	toEncrypt := make([][]byte, len(blocks))
 	for i, b := range blocks {
 		blockData := dataBuf.Next(int(b.Length))
 		// Incomplete block -> Read-Modify-Write
 		if b.IsPartial() {
 			// Read
 			oldData, status := f.doRead(nil, b.BlockPlainOff(), f.contentEnc.PlainBS())
 			if status != fuse.OK {
 				tlog.Warn.Printf("ino%d fh%d: RMW read failed: %s", f.qIno.Ino, f.intFd(), status.String())
 				return 0, status
 			}
 			// Modify
 			blockData = f.contentEnc.MergeBlocks(oldData, blockData, int(b.Skip))
 			tlog.Debug.Printf("len(oldData)=%d len(blockData)=%d", len(oldData), len(blockData))
 		}
 		tlog.Debug.Printf("ino%d: Writing %d bytes to block #%d",
 			f.qIno.Ino, len(blockData), b.BlockNo)
 		// Write into the to-encrypt list
 		toEncrypt[i] = blockData
 	}
 	// Encrypt all blocks
 	ciphertext := f.contentEnc.EncryptBlocks(toEncrypt, blocks[0].BlockNo, f.fileTableEntry.ID)
 	// Preallocate so we cannot run out of space in the middle of the write.
 	// This prevents partially written (=corrupt) blocks.
 	var err error
 	cOff := int64(blocks[0].BlockCipherOff())
 	if !f.rootNode.args.NoPrealloc {
 		err = syscallcompat.EnospcPrealloc(f.intFd(), cOff, int64(len(ciphertext)))
 		if err != nil {
 			if !syscallcompat.IsENOSPC(err) {
 				tlog.Warn.Printf("ino%d fh%d: doWrite: prealloc failed: %v", f.qIno.Ino, f.intFd(), err)
 			}
 			if fileWasEmpty {
 				// Kill the file header again
 				f.fileTableEntry.ID = nil
 				err2 := syscall.Ftruncate(f.intFd(), 0)
 				if err2 != nil {
 					tlog.Warn.Printf("ino%d fh%d: doWrite: rollback failed: %v", f.qIno.Ino, f.intFd(), err2)
 				}
 			}
 			return 0, fuse.ToStatus(err)
 		}
 	}
 	// Write
 	_, err = f.fd.WriteAt(ciphertext, cOff)
 	// Return memory to CReqPool
 	f.rootNode.contentEnc.CReqPool.Put(ciphertext)
 	if err != nil {
 		tlog.Warn.Printf("ino%d fh%d: doWrite: WriteAt off=%d len=%d failed: %v",
 			f.qIno.Ino, f.intFd(), cOff, len(ciphertext), err)
 		return 0, fuse.ToStatus(err)
 	}
 	return uint32(len(data)), fuse.OK
 }
 // isConsecutiveWrite returns true if the current write
 // directly (in time and space) follows the last write.
 // This is an optimisation for streaming writes on NFS where a
 // Stat() call is very expensive.
 // The caller must "wlock.lock(f.devIno.ino)" otherwise this check would be racy.
 func (f *File2) isConsecutiveWrite(off int64) bool {
 	opCount := openfiletable.WriteOpCount()
 	return opCount == f.lastOpCount+1 && off == f.lastWrittenOffset+1
 }
 // Write - FUSE call
 //
 // If the write creates a hole, pads the file to the next block boundary.
 func (f *File2) Write(data []byte, off int64) (uint32, fuse.Status) {
 	if len(data) > fuse.MAX_KERNEL_WRITE {
 		// This would crash us due to our fixed-size buffer pool
 		tlog.Warn.Printf("Write: rejecting oversized request with EMSGSIZE, len=%d", len(data))
 		return 0, fuse.Status(syscall.EMSGSIZE)
 	}
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	if f.released {
 		// The file descriptor has been closed concurrently
 		tlog.Warn.Printf("ino%d fh%d: Write on released file", f.qIno.Ino, f.intFd())
 		return 0, fuse.EBADF
 	}
 	f.fileTableEntry.ContentLock.Lock()
 	defer f.fileTableEntry.ContentLock.Unlock()
 	tlog.Debug.Printf("ino%d: FUSE Write: offset=%d length=%d", f.qIno.Ino, off, len(data))
 	// If the write creates a file hole, we have to zero-pad the last block.
 	// But if the write directly follows an earlier write, it cannot create a
 	// hole, and we can save one Stat() call.
 	if !f.isConsecutiveWrite(off) {
 		status := f.writePadHole(off)
 		if !status.Ok() {
 			return 0, status
 		}
 	}
 	n, status := f.doWrite(data, off)
 	if status.Ok() {
 		f.lastOpCount = openfiletable.WriteOpCount()
 		f.lastWrittenOffset = off + int64(len(data)) - 1
 	}
 	return n, status
 }
 // Release - FUSE call, close file
 func (f *File2) Release() {
 	f.fdLock.Lock()
 	if f.released {
 		log.Panicf("ino%d fh%d: double release", f.qIno.Ino, f.intFd())
 	}
 	f.released = true
 	openfiletable.Unregister(f.qIno)
 	f.fd.Close()
 	f.fdLock.Unlock()
 }
 // Flush - FUSE call
 func (f *File2) Flush() fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	// Since Flush() may be called for each dup'd fd, we don't
 	// want to really close the file, we just want to flush. This
 	// is achieved by closing a dup'd fd.
 	newFd, err := syscall.Dup(f.intFd())
 	if err != nil {
 		return fuse.ToStatus(err)
 	}
 	err = syscall.Close(newFd)
 	return fuse.ToStatus(err)
 }
 // Fsync FUSE call
 func (f *File2) Fsync(flags int) (code fuse.Status) {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	return fuse.ToStatus(syscall.Fsync(f.intFd()))
 }
 // Chmod FUSE call
 func (f *File2) Chmod(mode uint32) fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	// os.File.Chmod goes through the "syscallMode" translation function that messes
 	// up the suid and sgid bits. So use syscall.Fchmod directly.
 	err := syscall.Fchmod(f.intFd(), mode)
 	return fuse.ToStatus(err)
 }
 // Chown FUSE call
 func (f *File2) Chown(uid uint32, gid uint32) fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	return fuse.ToStatus(f.fd.Chown(int(uid), int(gid)))
 }
 // GetAttr FUSE call (like stat)
 func (f *File2) GetAttr(a *fuse.Attr) fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	tlog.Debug.Printf("file.GetAttr()")
 	st := syscall.Stat_t{}
 	err := syscall.Fstat(f.intFd(), &st)
 	if err != nil {
 		return fuse.ToStatus(err)
 	}
 	f.rootNode.inoMap.TranslateStat(&st)
 	a.FromStat(&st)
 	a.Size = f.contentEnc.CipherSizeToPlainSize(a.Size)
 	if f.rootNode.args.ForceOwner != nil {
 		a.Owner = *f.rootNode.args.ForceOwner
 	}
 	return fuse.OK
 }
 // Utimens FUSE call
 func (f *File2) Utimens(a *time.Time, m *time.Time) fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	err := syscallcompat.FutimesNano(f.intFd(), a, m)
 	return fuse.ToStatus(err)
 }
--- a/internal/fusefrontend/file2_allocate_truncate.go
+++ b/internal/fusefrontend/file2_allocate_truncate.go
@ -0,0 +1,217 @@
 package fusefrontend
 // FUSE operations Truncate and Allocate on file handles
 // i.e. ftruncate and fallocate
 import (
 	"log"
 	"syscall"
 	"github.com/hanwen/go-fuse/v2/fuse"
 	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 // Allocate - FUSE call for fallocate(2)
 //
 // mode=FALLOC_FL_KEEP_SIZE is implemented directly.
 //
 // mode=FALLOC_DEFAULT is implemented as a two-step process:
 //
 //   (1) Allocate the space using FALLOC_FL_KEEP_SIZE
 //   (2) Set the file size using ftruncate (via truncateGrowFile)
 //
 // This allows us to reuse the file grow mechanics from Truncate as they are
 // complicated and hard to get right.
 //
 // Other modes (hole punching, zeroing) are not supported.
 func (f *File2) Allocate(off uint64, sz uint64, mode uint32) fuse.Status {
 	if mode != FALLOC_DEFAULT && mode != FALLOC_FL_KEEP_SIZE {
 		f := func() {
 			tlog.Info.Printf("fallocate: only mode 0 (default) and 1 (keep size) are supported")
 		}
 		allocateWarnOnce.Do(f)
 		return fuse.Status(syscall.EOPNOTSUPP)
 	}
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	if f.released {
 		return fuse.EBADF
 	}
 	f.fileTableEntry.ContentLock.Lock()
 	defer f.fileTableEntry.ContentLock.Unlock()
 	blocks := f.contentEnc.ExplodePlainRange(off, sz)
 	firstBlock := blocks[0]
 	lastBlock := blocks[len(blocks)-1]
 	// Step (1): Allocate the space the user wants using FALLOC_FL_KEEP_SIZE.
 	// This will fill file holes and/or allocate additional space past the end of
 	// the file.
 	cipherOff := firstBlock.BlockCipherOff()
 	cipherSz := lastBlock.BlockCipherOff() - cipherOff +
 		f.contentEnc.BlockOverhead() + lastBlock.Skip + lastBlock.Length
 	err := syscallcompat.Fallocate(f.intFd(), FALLOC_FL_KEEP_SIZE, int64(cipherOff), int64(cipherSz))
 	tlog.Debug.Printf("Allocate off=%d sz=%d mode=%x cipherOff=%d cipherSz=%d\n",
 		off, sz, mode, cipherOff, cipherSz)
 	if err != nil {
 		return fuse.ToStatus(err)
 	}
 	if mode == FALLOC_FL_KEEP_SIZE {
 		// The user did not want to change the apparent size. We are done.
 		return fuse.OK
 	}
 	// Step (2): Grow the apparent file size
 	// We need the old file size to determine if we are growing the file at all.
 	newPlainSz := off + sz
 	oldPlainSz, err := f.statPlainSize()
 	if err != nil {
 		return fuse.ToStatus(err)
 	}
 	if newPlainSz <= oldPlainSz {
 		// The new size is smaller (or equal). Fallocate with mode = 0 never
 		// truncates a file, so we are done.
 		return fuse.OK
 	}
 	// The file grows. The space has already been allocated in (1), so what is
 	// left to do is to pad the first and last block and call truncate.
 	// truncateGrowFile does just that.
 	return f.truncateGrowFile(oldPlainSz, newPlainSz)
 }
 // Truncate - FUSE call
 func (f *File2) Truncate(newSize uint64) fuse.Status {
 	f.fdLock.RLock()
 	defer f.fdLock.RUnlock()
 	if f.released {
 		// The file descriptor has been closed concurrently.
 		tlog.Warn.Printf("ino%d fh%d: Truncate on released file", f.qIno.Ino, f.intFd())
 		return fuse.EBADF
 	}
 	f.fileTableEntry.ContentLock.Lock()
 	defer f.fileTableEntry.ContentLock.Unlock()
 	var err error
 	// Common case first: Truncate to zero
 	if newSize == 0 {
 		err = syscall.Ftruncate(int(f.fd.Fd()), 0)
 		if err != nil {
 			tlog.Warn.Printf("ino%d fh%d: Ftruncate(fd, 0) returned error: %v", f.qIno.Ino, f.intFd(), err)
 			return fuse.ToStatus(err)
 		}
 		// Truncate to zero kills the file header
 		f.fileTableEntry.ID = nil
 		return fuse.OK
 	}
 	// We need the old file size to determine if we are growing or shrinking
 	// the file
 	oldSize, err := f.statPlainSize()
 	if err != nil {
 		return fuse.ToStatus(err)
 	}
 	oldB := float32(oldSize) / float32(f.contentEnc.PlainBS())
 	newB := float32(newSize) / float32(f.contentEnc.PlainBS())
 	tlog.Debug.Printf("ino%d: FUSE Truncate from %.2f to %.2f blocks (%d to %d bytes)", f.qIno.Ino, oldB, newB, oldSize, newSize)
 	// File size stays the same - nothing to do
 	if newSize == oldSize {
 		return fuse.OK
 	}
 	// File grows
 	if newSize > oldSize {
 		return f.truncateGrowFile(oldSize, newSize)
 	}
 	// File shrinks
 	blockNo := f.contentEnc.PlainOffToBlockNo(newSize)
 	cipherOff := f.contentEnc.BlockNoToCipherOff(blockNo)
 	plainOff := f.contentEnc.BlockNoToPlainOff(blockNo)
 	lastBlockLen := newSize - plainOff
 	var data []byte
 	if lastBlockLen > 0 {
 		var status fuse.Status
 		data, status = f.doRead(nil, plainOff, lastBlockLen)
 		if status != fuse.OK {
 			tlog.Warn.Printf("Truncate: shrink doRead returned error: %v", err)
 			return status
 		}
 	}
 	// Truncate down to the last complete block
 	err = syscall.Ftruncate(int(f.fd.Fd()), int64(cipherOff))
 	if err != nil {
 		tlog.Warn.Printf("Truncate: shrink Ftruncate returned error: %v", err)
 		return fuse.ToStatus(err)
 	}
 	// Append partial block
 	if lastBlockLen > 0 {
 		_, status := f.doWrite(data, int64(plainOff))
 		return status
 	}
 	return fuse.OK
 }
 // statPlainSize stats the file and returns the plaintext size
 func (f *File2) statPlainSize() (uint64, error) {
 	fi, err := f.fd.Stat()
 	if err != nil {
 		tlog.Warn.Printf("ino%d fh%d: statPlainSize: %v", f.qIno.Ino, f.intFd(), err)
 		return 0, err
 	}
 	cipherSz := uint64(fi.Size())
 	plainSz := uint64(f.contentEnc.CipherSizeToPlainSize(cipherSz))
 	return plainSz, nil
 }
 // truncateGrowFile extends a file using seeking or ftruncate performing RMW on
 // the first and last block as necessary. New blocks in the middle become
 // file holes unless they have been fallocate()'d beforehand.
 func (f *File2) truncateGrowFile(oldPlainSz uint64, newPlainSz uint64) fuse.Status {
 	if newPlainSz <= oldPlainSz {
 		log.Panicf("BUG: newSize=%d <= oldSize=%d", newPlainSz, oldPlainSz)
 	}
 	newEOFOffset := newPlainSz - 1
 	if oldPlainSz > 0 {
 		n1 := f.contentEnc.PlainOffToBlockNo(oldPlainSz - 1)
 		n2 := f.contentEnc.PlainOffToBlockNo(newEOFOffset)
 		// The file is grown within one block, no need to pad anything.
 		// Write a single zero to the last byte and let doWrite figure out the RMW.
 		if n1 == n2 {
 			buf := make([]byte, 1)
 			_, status := f.doWrite(buf, int64(newEOFOffset))
 			return status
 		}
 	}
 	// The truncate creates at least one new block.
 	//
 	// Make sure the old last block is padded to the block boundary. This call
 	// is a no-op if it is already block-aligned.
 	status := f.zeroPad(oldPlainSz)
 	if !status.Ok() {
 		return status
 	}
 	// The new size is block-aligned. In this case we can do everything ourselves
 	// and avoid the call to doWrite.
 	if newPlainSz%f.contentEnc.PlainBS() == 0 {
 		// The file was empty, so it did not have a header. Create one.
 		if oldPlainSz == 0 {
 			id, err := f.createHeader()
 			if err != nil {
 				return fuse.ToStatus(err)
 			}
 			f.fileTableEntry.ID = id
 		}
 		cSz := int64(f.contentEnc.PlainSizeToCipherSize(newPlainSz))
 		err := syscall.Ftruncate(f.intFd(), cSz)
 		if err != nil {
 			tlog.Warn.Printf("Truncate: grow Ftruncate returned error: %v", err)
 		}
 		return fuse.ToStatus(err)
 	}
 	// The new size is NOT aligned, so we need to write a partial block.
 	// Write a single zero to the last byte and let doWrite figure it out.
 	buf := make([]byte, 1)
 	_, status = f.doWrite(buf, int64(newEOFOffset))
 	return status
 }
--- a/internal/fusefrontend/file2_holes.go
+++ b/internal/fusefrontend/file2_holes.go
@ -0,0 +1,92 @@
 package fusefrontend
 // Helper functions for sparse files (files with holes)
 import (
 	"runtime"
 	"syscall"
 	"github.com/hanwen/go-fuse/v2/fuse"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 // Will a write to plaintext offset "targetOff" create a file hole in the
 // ciphertext? If yes, zero-pad the last ciphertext block.
 func (f *File2) writePadHole(targetOff int64) fuse.Status {
 	// Get the current file size.
 	fi, err := f.fd.Stat()
 	if err != nil {
 		tlog.Warn.Printf("checkAndPadHole: Fstat failed: %v", err)
 		return fuse.ToStatus(err)
 	}
 	plainSize := f.contentEnc.CipherSizeToPlainSize(uint64(fi.Size()))
 	// Appending a single byte to the file (equivalent to writing to
 	// offset=plainSize) would write to "nextBlock".
 	nextBlock := f.contentEnc.PlainOffToBlockNo(plainSize)
 	// targetBlock is the block the user wants to write to.
 	targetBlock := f.contentEnc.PlainOffToBlockNo(uint64(targetOff))
 	// The write goes into an existing block or (if the last block was full)
 	// starts a new one directly after the last block. Nothing to do.
 	if targetBlock <= nextBlock {
 		return fuse.OK
 	}
 	// The write goes past the next block. nextBlock has
 	// to be zero-padded to the block boundary and (at least) nextBlock+1
 	// will contain a file hole in the ciphertext.
 	status := f.zeroPad(plainSize)
 	if status != fuse.OK {
 		return status
 	}
 	return fuse.OK
 }
 // Zero-pad the file of size plainSize to the next block boundary. This is a no-op
 // if the file is already block-aligned.
 func (f *File2) zeroPad(plainSize uint64) fuse.Status {
 	lastBlockLen := plainSize % f.contentEnc.PlainBS()
 	if lastBlockLen == 0 {
 		// Already block-aligned
 		return fuse.OK
 	}
 	missing := f.contentEnc.PlainBS() - lastBlockLen
 	pad := make([]byte, missing)
 	tlog.Debug.Printf("zeroPad: Writing %d bytes\n", missing)
 	_, status := f.doWrite(pad, int64(plainSize))
 	return status
 }
 // SeekData calls the lseek syscall with SEEK_DATA. It returns the offset of the
 // next data bytes, skipping over file holes.
 func (f *File2) SeekData(oldOffset int64) (int64, error) {
 	if runtime.GOOS != "linux" {
 		// Does MacOS support something like this?
 		return 0, syscall.EOPNOTSUPP
 	}
 	const SEEK_DATA = 3
 	// Convert plaintext offset to ciphertext offset and round down to the
 	// start of the current block. File holes smaller than a full block will
 	// be ignored.
 	blockNo := f.contentEnc.PlainOffToBlockNo(uint64(oldOffset))
 	oldCipherOff := int64(f.contentEnc.BlockNoToCipherOff(blockNo))
 	// Determine the next data offset. If the old offset points to (or beyond)
 	// the end of the file, the Seek syscall fails with syscall.ENXIO.
 	newCipherOff, err := syscall.Seek(f.intFd(), oldCipherOff, SEEK_DATA)
 	if err != nil {
 		return 0, err
 	}
 	// Convert ciphertext offset back to plaintext offset. At this point,
 	// newCipherOff should always be >= contentenc.HeaderLen. Round down,
 	// but ensure that the result is never smaller than the initial offset
 	// (to avoid endless loops).
 	blockNo = f.contentEnc.CipherOffToBlockNo(uint64(newCipherOff))
 	newOffset := int64(f.contentEnc.BlockNoToPlainOff(blockNo))
 	if newOffset < oldOffset {
 		newOffset = oldOffset
 	}
 	return newOffset, nil
 }
--- a/internal/fusefrontend/node.go
+++ b/internal/fusefrontend/node.go
@ -2,6 +2,7 @@ package fusefrontend
 import (
 	"context"
 	"os"
 	"path/filepath"
 	"syscall"
@ -10,7 +11,9 @@ import (
 	"github.com/hanwen/go-fuse/v2/fs"
 	"github.com/hanwen/go-fuse/v2/fuse"
 	"github.com/rfjakob/gocryptfs/internal/nametransform"
 	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 // Node is a file or directory in the filesystem tree
@ -31,6 +34,9 @@ func (n *Node) rootNode() *RootNode {
 func (n *Node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {
 	rn := n.rootNode()
 	p := filepath.Join(n.path(), name)
 	if rn.isFiltered(p) {
 		return nil, syscall.EPERM
 	}
 	dirfd, cName, err := rn.openBackingDir(p)
 	if err != nil {
 		return nil, fs.ToErrno(err)
@ -71,3 +77,68 @@ func (n *Node) Getattr(ctx context.Context, f fs.FileHandle, out *fuse.AttrOut)
 	out.Attr.FromStat(st)
 	return 0
 }
 func (n *Node) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (inode *fs.Inode, fh fs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
 	rn := n.rootNode()
 	path := filepath.Join(n.path(), name)
 	if rn.isFiltered(path) {
 		return nil, nil, 0, syscall.EPERM
 	}
 	dirfd, cName, err := rn.openBackingDir(path)
 	if err != nil {
 		return nil, nil, 0, fs.ToErrno(err)
 	}
 	defer syscall.Close(dirfd)
 	fd := -1
 	// Make sure context is nil if we don't want to preserve the owner
 	if !rn.args.PreserveOwner {
 		ctx = nil
 	}
 	newFlags := rn.mangleOpenFlags(flags)
 	// Handle long file name
 	if !rn.args.PlaintextNames && nametransform.IsLongContent(cName) {
 		// Create ".name"
 		err = rn.nameTransform.WriteLongNameAt(dirfd, cName, path)
 		if err != nil {
 			return nil, nil, 0, fs.ToErrno(err)
 		}
 		// Create content
 		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx)
 		if err != nil {
 			nametransform.DeleteLongNameAt(dirfd, cName)
 		}
 	} else {
 		// Create content, normal (short) file name
 		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx)
 	}
 	if err != nil {
 		// xfstests generic/488 triggers this
 		if err == syscall.EMFILE {
 			var lim syscall.Rlimit
 			syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim)
 			tlog.Warn.Printf("Create %q: too many open files. Current \"ulimit -n\": %d", cName, lim.Cur)
 		}
 		return nil, nil, 0, fs.ToErrno(err)
 	}
 	// Get device number and inode number into `st`
 	st, err := syscallcompat.Fstatat2(dirfd, cName, unix.AT_SYMLINK_NOFOLLOW)
 	if err != nil {
 		return nil, nil, 0, fs.ToErrno(err)
 	}
 	// Get unique inode number
 	rn.inoMap.TranslateStat(st)
 	out.Attr.FromStat(st)
 	// Create child node
 	id := fs.StableAttr{
 		Mode: uint32(st.Mode),
 		Gen:  1,
 		Ino:  st.Ino,
 	}
 	node := &Node{}
 	ch := n.NewInode(ctx, node, id)
 	f := os.NewFile(uintptr(fd), cName)
 	return ch, NewFile2(f, rn, st), 0, 0
 }
--- a/internal/fusefrontend/root_node.go
+++ b/internal/fusefrontend/root_node.go
@ -1,11 +1,16 @@
 package fusefrontend
 import (
 	"os"
 	"sync/atomic"
 	"syscall"
 	"time"
 	"github.com/rfjakob/gocryptfs/internal/configfile"
 	"github.com/rfjakob/gocryptfs/internal/contentenc"
 	"github.com/rfjakob/gocryptfs/internal/inomap"
 	"github.com/rfjakob/gocryptfs/internal/nametransform"
 	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
@ -47,6 +52,30 @@ func NewRootNode(args Args, c *contentenc.ContentEnc, n nametransform.NameTransf
 	}
 }
 // mangleOpenFlags is used by Create() and Open() to convert the open flags the user
 // wants to the flags we internally use to open the backing file.
 // The returned flags always contain O_NOFOLLOW.
 func (rn *RootNode) mangleOpenFlags(flags uint32) (newFlags int) {
 	newFlags = int(flags)
 	// Convert WRONLY to RDWR. We always need read access to do read-modify-write cycles.
 	if (newFlags & syscall.O_ACCMODE) == syscall.O_WRONLY {
 		newFlags = newFlags ^ os.O_WRONLY | os.O_RDWR
 	}
 	// We also cannot open the file in append mode, we need to seek back for RMW
 	newFlags = newFlags &^ os.O_APPEND
 	// O_DIRECT accesses must be aligned in both offset and length. Due to our
 	// crypto header, alignment will be off, even if userspace makes aligned
 	// accesses. Running xfstests generic/013 on ext4 used to trigger lots of
 	// EINVAL errors due to missing alignment. Just fall back to buffered IO.
 	newFlags = newFlags &^ syscallcompat.O_DIRECT
 	// Create and Open are two separate FUSE operations, so O_CREAT should not
 	// be part of the open flags.
 	newFlags = newFlags &^ syscall.O_CREAT
 	// We always want O_NOFOLLOW to be safe against symlink races
 	newFlags |= syscall.O_NOFOLLOW
 	return newFlags
 }
 // reportMitigatedCorruption is used to report a corruption that was transparently
 // mitigated and did not return an error to the user. Pass the name of the corrupt
 // item (filename for OpenDir(), xattr name for ListXAttr() etc).
@ -63,3 +92,23 @@ func (rn *RootNode) reportMitigatedCorruption(item string) {
 		return
 	}
 }
 // isFiltered - check if plaintext "path" should be forbidden
 //
 // Prevents name clashes with internal files when file names are not encrypted
 func (rn *RootNode) isFiltered(path string) bool {
 	atomic.StoreUint32(&rn.IsIdle, 0)
 	if !rn.args.PlaintextNames {
 		return false
 	}
 	// gocryptfs.conf in the root directory is forbidden
 	if path == configfile.ConfDefaultName {
 		tlog.Info.Printf("The name /%s is reserved when -plaintextnames is used\n",
 			configfile.ConfDefaultName)
 		return true
 	}
 	// Note: gocryptfs.diriv is NOT forbidden because diriv and plaintextnames
 	// are exclusive
 	return false
 }
--- a/internal/syscallcompat/sys_linux.go
+++ b/internal/syscallcompat/sys_linux.go
@ -2,6 +2,7 @@
 package syscallcompat
 import (
 	"context"
 	"fmt"
 	"io/ioutil"
 	"runtime"
@ -88,6 +89,20 @@ func getSupplementaryGroups(pid uint32) (gids []int) {
 	return nil
 }
 // OpenatUserCtx is a tries to extract a fuse.Context from the generic ctx and
 // calls OpenatUser.
 func OpenatUserCtx(dirfd int, path string, flags int, mode uint32, ctx context.Context) (fd int, err error) {
 	var ctx2 *fuse.Context
 	if ctx != nil {
 		if caller, ok := fuse.FromContext(ctx); ok {
 			ctx2 = &fuse.Context{
 				Caller: *caller,
 			}
 		}
 	}
 	return OpenatUser(dirfd, path, flags, mode, ctx2)
 }
 // OpenatUser runs the Openat syscall in the context of a different user.
 func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) {
 	if context != nil {