From 94399bda33cc4e4bc94c59378027455af12cc56a Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Wed, 27 Dec 2023 00:41:59 +0100 Subject: Make sync more robust --- main.go | 87 ++++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/main.go b/main.go index db57896..bbe6f92 100644 --- a/main.go +++ b/main.go @@ -32,6 +32,7 @@ import ( "strconv" "strings" "sync" + "syscall" "time" "github.com/mattn/go-sqlite3" @@ -1148,6 +1149,9 @@ type syncContext struct { stmtOrphan *sql.Stmt stmtDisposeSub *sql.Stmt stmtDisposeAll *sql.Stmt + + // linked tracks which image hashes we've checked so far in the run. + linked map[string]struct{} } func syncPrintf(c *syncContext, format string, v ...any) { @@ -1338,36 +1342,59 @@ func syncDispose(c *syncContext, nodeID int64, keepNode bool) error { return nil } -func syncImage(c *syncContext, info syncFileInfo) error { - if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height) - VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, - info.sha1, info.width, info.height); err != nil { - return err - } - - // Fast path: it may already there, and not be a dead symlink. - path := imagePath(info.sha1) - if _, err := os.Stat(path); err == nil { - return nil - } - +func syncImageResave(c *syncContext, path string, target string) error { dirname, _ := filepath.Split(path) if err := os.MkdirAll(dirname, 0755); err != nil { return err } for { + // Try to remove anything standing in the way. + err := os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + // TODO: Make it possible to copy or reflink (ioctl FICLONE). - err := os.Symlink(info.fsPath, path) - if !errors.Is(err, fs.ErrExist) { + err = os.Symlink(target, path) + if err == nil || !errors.Is(err, fs.ErrExist) { return err } + } +} + +func syncImageSave(c *syncContext, sha1 string, target string) error { + if _, ok := c.linked[sha1]; ok { + return nil + } - // Try to remove anything standing in the way, and try again. - if err = os.Remove(path); err != nil { + ok, path := false, imagePath(sha1) + if link, err := os.Readlink(path); err == nil { + ok = link == target + } else { + // If it exists, but it is not a symlink, let it be. + // Even though it may not be a regular file. + ok = errors.Is(err, syscall.EINVAL) + } + + if !ok { + if err := syncImageResave(c, path, target); err != nil { return err } } + + c.linked[sha1] = struct{}{} + return nil +} + +func syncImage(c *syncContext, info syncFileInfo) error { + if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height) + VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, + info.sha1, info.width, info.height); err != nil { + return err + } + + return syncImageSave(c, info.sha1, info.fsPath) } func syncPostProcess(c *syncContext, info syncFileInfo) error { @@ -1416,7 +1443,7 @@ func syncPostProcess(c *syncContext, info syncFileInfo) error { return err } - // Even if the hash didn't change, we may fix any broken symlinks. + // Even if the hash didn't change, see comment in syncDirectoryPair(). if err := syncImage(c, info); err != nil { return err } @@ -1453,7 +1480,7 @@ func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string, } else if id, err = result.LastInsertId(); err != nil { return err } - return syncDirectory(c, id, filepath.Join(fsPath, fs.fsName)) + return syncDirectory(c, id, fsInfo.fsPath) case db == nil: // 0 → F (or 0 → 0) @@ -1466,7 +1493,7 @@ func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string, case db.dbIsDir() && fs.fsIsDir: // D → D - return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName)) + return syncDirectory(c, db.dbID, fsInfo.fsPath) case db.dbIsDir(): // D → F (or D → 0) @@ -1481,12 +1508,27 @@ func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string, SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil { return err } - return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName)) + return syncDirectory(c, db.dbID, fsInfo.fsPath) case db.dbMtime != fs.fsMtime: // F → F (or F → 0) // Assuming that any content modifications change the timestamp. return syncEnqueue(c, fsInfo) + + default: + // F → F + // Try to fix symlinks, to handle the following situations: + // 1. Image A occurs in paths 1 and 2, we use a symlink to path 1, + // and path 1 is removed from the filesystem: + // path 2 would not resolve if the mtime didn't change. + // 2. Image A occurs in paths 1 and 2, we use a symlink to path 1, + // and path 1 is changed: + // path 2 would resolve to the wrong file. + // This may relink images with multiple occurences unnecessarily, + // but it will always fix the roots that are being synced. + if err := syncImageSave(c, db.dbSHA1, fsInfo.fsPath); err != nil { + return err + } } return nil } @@ -1728,7 +1770,8 @@ func cmdSync(fs *flag.FlagSet, args []string) error { return err } - c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1)} + c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1), + linked: make(map[string]struct{})} defer c.pb.Stop() if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + ` -- cgit v1.2.3-70-g09d2