c943ed32aa
The allocations from Lgetxattr were #1 in the tar extract allocation profile (caused by security.capability lookups). No more! $ benchstat old.txt new.txt name old time/op new time/op delta Lgetxattr-4 15.2µs ± 0% 1.8µs ± 0% ~ (p=1.000 n=1+1) $ ./benchmark.bash Testing gocryptfs at /tmp/benchmark.bash.H8p: gocryptfs v2.0-beta1-4-g95ea738-dirty; go-fuse v2.0.4-0.20200908172753-0b6cbc515082 => github.com/rfjakob/go-fuse/v2 v2.0.4-0.20201015204057-88b12c99f8af; 2020-10-18 go1.15.3 linux/amd64 /tmp/benchmark.bash.H8p.mnt is a mountpoint WRITE: 262144000 bytes (262 MB, 250 MiB) copied, 0,520109 s, 504 MB/s READ: 262144000 bytes (262 MB, 250 MiB) copied, 0,255672 s, 1,0 GB/s UNTAR: 30,238 MD5: 12,721 LS: 10,038 RM: 16,536
262 lines
7.6 KiB
Go
262 lines
7.6 KiB
Go
package syscallcompat
|
|
|
|
import (
|
|
"bytes"
|
|
"syscall"
|
|
|
|
"golang.org/x/sys/unix"
|
|
|
|
"github.com/rfjakob/gocryptfs/internal/tlog"
|
|
)
|
|
|
|
// PATH_MAX is the maximum allowed path length on Linux.
|
|
// It is not defined on Darwin, so we use the Linux value.
|
|
const PATH_MAX = 4096
|
|
|
|
// Readlinkat is a convenience wrapper around unix.Readlinkat() that takes
|
|
// care of buffer sizing. Implemented like os.Readlink().
|
|
func Readlinkat(dirfd int, path string) (string, error) {
|
|
// Allocate the buffer exponentially like os.Readlink does.
|
|
for bufsz := 128; ; bufsz *= 2 {
|
|
buf := make([]byte, bufsz)
|
|
n, err := unix.Readlinkat(dirfd, path, buf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if n < bufsz {
|
|
return string(buf[0:n]), nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Faccessat exists both in Linux and in MacOS 10.10+, but the Linux version
|
|
// DOES NOT support any flags. Emulate AT_SYMLINK_NOFOLLOW like glibc does.
|
|
func Faccessat(dirfd int, path string, mode uint32) error {
|
|
var st unix.Stat_t
|
|
err := Fstatat(dirfd, path, &st, unix.AT_SYMLINK_NOFOLLOW)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
|
|
// Pretend that a symlink is always accessible
|
|
return nil
|
|
}
|
|
return unix.Faccessat(dirfd, path, mode, 0)
|
|
}
|
|
|
|
// Openat wraps the Openat syscall.
|
|
// Retries on EINTR.
|
|
func Openat(dirfd int, path string, flags int, mode uint32) (fd int, err error) {
|
|
if flags&syscall.O_CREAT != 0 {
|
|
// O_CREAT should be used with O_EXCL. O_NOFOLLOW has no effect with O_EXCL.
|
|
if flags&syscall.O_EXCL == 0 {
|
|
tlog.Warn.Printf("Openat: O_CREAT without O_EXCL: flags = %#x", flags)
|
|
flags |= syscall.O_EXCL
|
|
}
|
|
} else {
|
|
// If O_CREAT is not used, we should use O_NOFOLLOW
|
|
if flags&syscall.O_NOFOLLOW == 0 {
|
|
tlog.Warn.Printf("Openat: O_NOFOLLOW missing: flags = %#x", flags)
|
|
flags |= syscall.O_NOFOLLOW
|
|
}
|
|
}
|
|
fd, err = retryEINTR2(func() (int, error) {
|
|
return unix.Openat(dirfd, path, flags, mode)
|
|
})
|
|
return fd, err
|
|
}
|
|
|
|
// Renameat wraps the Renameat syscall.
|
|
// Retries on EINTR.
|
|
func Renameat(olddirfd int, oldpath string, newdirfd int, newpath string) (err error) {
|
|
err = retryEINTR(func() error {
|
|
return unix.Renameat(olddirfd, oldpath, newdirfd, newpath)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Unlinkat syscall.
|
|
// Retries on EINTR.
|
|
func Unlinkat(dirfd int, path string, flags int) (err error) {
|
|
err = retryEINTR(func() error {
|
|
return unix.Unlinkat(dirfd, path, flags)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Fchownat syscall.
|
|
func Fchownat(dirfd int, path string, uid int, gid int, flags int) (err error) {
|
|
// Why would we ever want to call this without AT_SYMLINK_NOFOLLOW?
|
|
if flags&unix.AT_SYMLINK_NOFOLLOW == 0 {
|
|
tlog.Warn.Printf("Fchownat: adding missing AT_SYMLINK_NOFOLLOW flag")
|
|
flags |= unix.AT_SYMLINK_NOFOLLOW
|
|
}
|
|
return unix.Fchownat(dirfd, path, uid, gid, flags)
|
|
}
|
|
|
|
// Linkat exists both in Linux and in MacOS 10.10+.
|
|
func Linkat(olddirfd int, oldpath string, newdirfd int, newpath string, flags int) (err error) {
|
|
return unix.Linkat(olddirfd, oldpath, newdirfd, newpath, flags)
|
|
}
|
|
|
|
// Symlinkat syscall.
|
|
func Symlinkat(oldpath string, newdirfd int, newpath string) (err error) {
|
|
return unix.Symlinkat(oldpath, newdirfd, newpath)
|
|
}
|
|
|
|
// Mkdirat syscall.
|
|
func Mkdirat(dirfd int, path string, mode uint32) (err error) {
|
|
return unix.Mkdirat(dirfd, path, mode)
|
|
}
|
|
|
|
// Fstatat syscall.
|
|
// Retries on EINTR.
|
|
func Fstatat(dirfd int, path string, stat *unix.Stat_t, flags int) (err error) {
|
|
// Why would we ever want to call this without AT_SYMLINK_NOFOLLOW?
|
|
if flags&unix.AT_SYMLINK_NOFOLLOW == 0 {
|
|
tlog.Warn.Printf("Fstatat: adding missing AT_SYMLINK_NOFOLLOW flag")
|
|
flags |= unix.AT_SYMLINK_NOFOLLOW
|
|
}
|
|
err = retryEINTR(func() error {
|
|
return unix.Fstatat(dirfd, path, stat, flags)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Fstatat2 is a more convenient version of Fstatat. It allocates a Stat_t
|
|
// for you and also handles the Unix2syscall conversion.
|
|
// Retries on EINTR.
|
|
func Fstatat2(dirfd int, path string, flags int) (*syscall.Stat_t, error) {
|
|
var stUnix unix.Stat_t
|
|
err := Fstatat(dirfd, path, &stUnix, flags)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
st := Unix2syscall(stUnix)
|
|
return &st, nil
|
|
}
|
|
|
|
const XATTR_SIZE_MAX = 65536
|
|
|
|
// Make the buffer 1kB bigger so we can detect overflows. Unfortunately,
|
|
// slices larger than 64kB are always allocated on the heap.
|
|
const XATTR_BUFSZ = XATTR_SIZE_MAX + 1024
|
|
|
|
// We try with a small buffer first - this one can be allocated on the stack.
|
|
const XATTR_BUFSZ_SMALL = 500
|
|
|
|
// Fgetxattr is a wrapper around unix.Fgetxattr that handles the buffer sizing.
|
|
func Fgetxattr(fd int, attr string) (val []byte, err error) {
|
|
fn := func(buf []byte) (int, error) {
|
|
return unix.Fgetxattr(fd, attr, buf)
|
|
}
|
|
return getxattrSmartBuf(fn)
|
|
}
|
|
|
|
// Lgetxattr is a wrapper around unix.Lgetxattr that handles the buffer sizing.
|
|
func Lgetxattr(path string, attr string) (val []byte, err error) {
|
|
fn := func(buf []byte) (int, error) {
|
|
return unix.Lgetxattr(path, attr, buf)
|
|
}
|
|
return getxattrSmartBuf(fn)
|
|
}
|
|
|
|
func getxattrSmartBuf(fn func(buf []byte) (int, error)) ([]byte, error) {
|
|
// Fastpaths. Important for security.capabilities, which gets queried a lot.
|
|
buf := make([]byte, XATTR_BUFSZ_SMALL)
|
|
sz, err := fn(buf)
|
|
// Non-existing xattr
|
|
if err == unix.ENODATA {
|
|
return nil, err
|
|
}
|
|
// Underlying fs does not support security.capabilities (example: tmpfs)
|
|
if err == unix.EOPNOTSUPP {
|
|
return nil, err
|
|
}
|
|
// Small xattr
|
|
if err == nil && sz < len(buf) {
|
|
goto out
|
|
}
|
|
// Generic slowpath
|
|
//
|
|
// If the buffer is too small to fit the value, Linux and MacOS react
|
|
// differently:
|
|
// Linux: returns an ERANGE error and "-1" bytes.
|
|
// MacOS: truncates the value and returns "size" bytes.
|
|
//
|
|
// We choose the simple approach of buffer that is bigger than the limit on
|
|
// Linux, and return an error for everything that is bigger (which can
|
|
// only happen on MacOS).
|
|
buf = make([]byte, XATTR_BUFSZ)
|
|
sz, err = fn(buf)
|
|
if err == syscall.ERANGE {
|
|
// Do NOT return ERANGE - the user might retry ad inifinitum!
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if sz >= XATTR_SIZE_MAX {
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
out:
|
|
// Copy only the actually used bytes to a new (smaller) buffer
|
|
// so "buf" never leaves the function and can be allocated on the stack.
|
|
val := make([]byte, sz)
|
|
copy(val, buf)
|
|
return val, nil
|
|
}
|
|
|
|
// Flistxattr is a wrapper for unix.Flistxattr that handles buffer sizing and
|
|
// parsing the returned blob to a string slice.
|
|
func Flistxattr(fd int) (attrs []string, err error) {
|
|
// See the buffer sizing comments in getxattrSmartBuf.
|
|
// TODO: smarter buffer sizing?
|
|
buf := make([]byte, XATTR_BUFSZ)
|
|
sz, err := unix.Flistxattr(fd, buf)
|
|
if err == syscall.ERANGE {
|
|
// Do NOT return ERANGE - the user might retry ad inifinitum!
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if sz >= XATTR_SIZE_MAX {
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
attrs = parseListxattrBlob(buf[:sz])
|
|
return attrs, nil
|
|
}
|
|
|
|
// Llistxattr is a wrapper for unix.Llistxattr that handles buffer sizing and
|
|
// parsing the returned blob to a string slice.
|
|
func Llistxattr(path string) (attrs []string, err error) {
|
|
// TODO: smarter buffer sizing?
|
|
buf := make([]byte, XATTR_BUFSZ)
|
|
sz, err := unix.Llistxattr(path, buf)
|
|
if err == syscall.ERANGE {
|
|
// Do NOT return ERANGE - the user might retry ad inifinitum!
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if sz >= XATTR_SIZE_MAX {
|
|
return nil, syscall.EOVERFLOW
|
|
}
|
|
attrs = parseListxattrBlob(buf[:sz])
|
|
return attrs, nil
|
|
}
|
|
|
|
func parseListxattrBlob(buf []byte) (attrs []string) {
|
|
parts := bytes.Split(buf, []byte{0})
|
|
for _, part := range parts {
|
|
if len(part) == 0 {
|
|
// Last part is empty, ignore
|
|
continue
|
|
}
|
|
attrs = append(attrs, string(part))
|
|
}
|
|
return attrs
|
|
}
|