From c1d7e38761d35149f19524fa19d3afaaca73f302 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Sat, 29 May 2021 16:00:40 +0200 Subject: [PATCH] findholes: add --create, --verify Also, change the logic for the segment walk to not rely on the total size. cp does not use the total size either, and we miss bugs by cheating! --- contrib/findholes/holes/holes.go | 119 +++++++++++++++++++++++++++---- contrib/findholes/main.go | 25 ++++++- 2 files changed, 130 insertions(+), 14 deletions(-) diff --git a/contrib/findholes/holes/holes.go b/contrib/findholes/holes/holes.go index a2c16de..7d77ae1 100644 --- a/contrib/findholes/holes/holes.go +++ b/contrib/findholes/holes/holes.go @@ -4,7 +4,11 @@ package holes import ( "fmt" + "log" + "math/rand" + "os" "syscall" + "time" ) const ( @@ -16,6 +20,19 @@ const ( SegmentEOF = SegmentType(102) ) +type Whence int + +func (w Whence) String() string { + switch w { + case SEEK_DATA: + return "SEEK_DATA" + case SEEK_HOLE: + return "SEEK_HOLE" + default: + return "???" + } +} + type Segment struct { Offset int64 Type SegmentType @@ -42,8 +59,11 @@ func (s SegmentType) String() string { // PrettyPrint pretty-prints the Segments. func PrettyPrint(segments []Segment) (out string) { - for _, s := range segments { - out += "\n" + s.String() + for i, s := range segments { + out += s.String() + if i < len(segments)-1 { + out += "\n" + } } return out } @@ -62,6 +82,7 @@ func Find(fd int) (segments []Segment, err error) { // find out if file starts with data or hole off, err := syscall.Seek(fd, 0, SEEK_DATA) + // starts with hole and has no data if err == syscall.ENXIO { segments = append(segments, Segment{0, SegmentHole}, @@ -71,36 +92,108 @@ func Find(fd int) (segments []Segment, err error) { if err != nil { return nil, err } + // starts with data if off == cursor { segments = append(segments, Segment{0, SegmentData}) } else { + // starts with hole segments = append(segments, Segment{0, SegmentHole}, - Segment{totalSize, SegmentData}) + Segment{off, SegmentData}) cursor = off } // now we are at the start of data. // find next hole, then next data, then next hole, then next data... for { - cursor, err = syscall.Seek(fd, cursor, SEEK_HOLE) + oldCursor := cursor + // Next hole + off, err = syscall.Seek(fd, cursor, SEEK_HOLE) if err != nil { return nil, err } - if cursor == totalSize { - segments = append(segments, Segment{cursor, SegmentEOF}) - break + segments = append(segments, Segment{off, SegmentHole}) + cursor = off + + // Next data + off, err := syscall.Seek(fd, cursor, SEEK_DATA) + // No more data? + if err == syscall.ENXIO { + segments = append(segments, + Segment{totalSize, SegmentEOF}) + return segments, nil } - segments = append(segments, Segment{cursor, SegmentHole}) - cursor, err = syscall.Seek(fd, cursor, SEEK_DATA) if err != nil { return nil, err } - if cursor == totalSize { - segments = append(segments, Segment{cursor, SegmentEOF}) - break + segments = append(segments, Segment{off, SegmentData}) + cursor = off + + if oldCursor == cursor { + return nil, fmt.Errorf("%s\nerror: seek loop!", PrettyPrint(segments)) } - segments = append(segments, Segment{cursor, SegmentData}) } return segments, nil } + +// Verify the gives `segments` using a full bytewise file scan +func Verify(fd int, segments []Segment) (err error) { + last := segments[len(segments)-1] + if last.Type != SegmentEOF { + log.Panicf("BUG: last segment is not EOF. segments: %v", segments) + } + + for i, s := range segments { + var whence int + switch s.Type { + case SegmentHole: + whence = SEEK_HOLE + case SegmentData: + whence = SEEK_DATA + case SegmentEOF: + continue + default: + log.Panicf("BUG: unkown segment type %d", s.Type) + } + for off := s.Offset; off < segments[i+1].Offset; off++ { + res, err := syscall.Seek(fd, off, whence) + if err != nil { + return fmt.Errorf("error: seek(%d, %s) returned error %v", off, Whence(whence).String(), err) + } + if res != off { + return fmt.Errorf("error: seek(%d, %s) returned new offset %d", off, Whence(whence).String(), res) + } + } + } + return err +} + +// Create a test file at `path` with random holes +func Create(path string) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) + if err != nil { + panic(err) + } + defer f.Close() + + rand.Seed(time.Now().UnixNano()) + offsets := make([]int64, 10) + for i := range offsets { + offsets[i] = int64(rand.Int31n(60000)) + } + + buf := []byte("x") + for _, off := range offsets { + _, err = f.WriteAt(buf, off) + if err != nil { + panic(err) + } + } + + // Expand the file to 50000 bytes so we sometimes have a hole on the end + if offsets[len(offsets)-1] < 50000 { + f.Truncate(50000) + } + + f.Sync() +} diff --git a/contrib/findholes/main.go b/contrib/findholes/main.go index 6f1ae64..17597ae 100644 --- a/contrib/findholes/main.go +++ b/contrib/findholes/main.go @@ -10,13 +10,25 @@ import ( ) func main() { + flags := struct { + verify *bool + create *bool + }{} + flags.verify = flag.Bool("verify", false, "Verify results using full file scan") + flags.create = flag.Bool("create", false, "Create test file with random holes") flag.Parse() if flag.NArg() != 1 { fmt.Printf("Usage: findholes FILE\n") os.Exit(1) } - f, err := os.Open(flag.Arg(0)) + path := flag.Arg(0) + + if *flags.create { + holes.Create(path) + } + + f, err := os.Open(path) if err != nil { // os.Open() gives nicer error messages than syscall.Open() fmt.Println(err) @@ -31,4 +43,15 @@ func main() { } fmt.Println(holes.PrettyPrint(segments)) + + if *flags.verify { + err = holes.Verify(int(f.Fd()), segments) + if err != nil { + fmt.Println(err) + os.Exit(1) + } else { + fmt.Println("verify ok") + } + } + }