From 068f6f82cfee723e6c6213ea58d05c9e9708e131 Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Thu, 21 Dec 2023 06:19:03 +0100 Subject: WIP: FS to DB sync --- initialize.sql | 17 +++++++++++++++- main.go | 62 ++++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/initialize.sql b/initialize.sql index c230f18..60b50f0 100644 --- a/initialize.sql +++ b/initialize.sql @@ -47,13 +47,28 @@ END; -- --- TODO: Add a trigger to delete from here when a node for the sha1 is added. CREATE TABLE IF NOT EXISTS orphan( sha1 TEXT NOT NULL REFERENCES image(sha1) path TEXT NOT NULL, PRIMARY KEY (sha1) ) STRICT; +-- Renaming/moving a file can result either in a (ref, unref) or a (unref, ref) +-- sequence during sync, and I want to get at the same result. +CREATE TRIGGER IF NOT EXISTS node__sha1__deorphan_insert +AFTER INSERT ON node +WHEN NEW.sha1 IS NOT NULL +BEGIN + DELETE FROM orphan WHERE sha1 = NEW.sha1; +END; + +CREATE TRIGGER IF NOT EXISTS node__sha1__deorphan_update +AFTER UPDATE OF sha1 ON node +WHEN NEW.sha1 IS NOT NULL +BEGIN + DELETE FROM orphan WHERE sha1 = NEW.sha1; +END; + -- CREATE TABLE IF NOT EXISTS tag_space( diff --git a/main.go b/main.go index 897ae1e..7f15b5c 100644 --- a/main.go +++ b/main.go @@ -831,6 +831,11 @@ type syncFile struct { fsIsDir bool } +type syncPair struct { + db *syncNode + fs *syncFile +} + // syncGetNodes returns direct children of a DB node, ordered by name. // SQLite, like Go, compares strings byte-wise by default. func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) { @@ -885,9 +890,38 @@ func syncGetFiles(fsPath string) (files []syncFile, err error) { // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -type syncPair struct { - db *syncNode - fs *syncFile +func syncProcess(c *syncContext, info *syncFileInfo) error { + // Skip videos, which ImageMagick can process, but we don't want it to, + // so that they're not converted 1:1 to WebP. + pathIsImage, err := isImage(info.fsPath) + if err != nil { + return err + } + if !pathIsImage { + return nil + } + + info.width, info.height, err = pingImage(info.fsPath) + if err != nil { + return err + } + + f, err := os.Open(info.fsPath) + if err != nil { + return err + } + defer f.Close() + + // We could make this at least somewhat interruptible by c.ctx, + // though it would still work poorly. + hash := sha1.New() + _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) + if err != nil { + return err + } + + info.sha1 = hex.EncodeToString(hash.Sum(nil)) + return nil } // syncEnqueue runs file scanning, which can be CPU and I/O expensive, @@ -899,8 +933,8 @@ func syncEnqueue(c *syncContext, info syncFileInfo) error { go func(info syncFileInfo) { defer taskSemaphore.release() - - // TODO: Process the file and enqueue a result. + info.err = syncProcess(c, &info) + c.info <- info }(info) return nil } @@ -921,22 +955,20 @@ func syncDequeue(c *syncContext) error { } } -// TODO: Implement. -// -// - When collecting node subtrees, we need to delete bottom-up -// because of foreign key constraints, -// so maybe in reverse order of recursive CTE results. -// -// - Sadly, this can't be done with a DB trigger. (What and why?) -// -// - One of the inputs needs to be the FS path, for the orphan table. -// // syncDispose creates orphan records for the entire subtree given by nodeID // as appropriate, then deletes all nodes within the subtree. The subtree root // node is not deleted if "keepNode" is true. // // Orphans keep their thumbnail files, as evidence. func syncDispose(c *syncContext, nodeID int64, keepNode bool) error { + // TODO: Implement. + // - When collecting node subtrees, we need to delete bottom-up + // because of foreign key constraints, + // so maybe in reverse order of recursive CTE results. + // - Sadly, this can't be done with a DB trigger. (What and why?) + // - One of the inputs needs to be the FS path, for the orphan table. + // - I may not have the FS path (symlink). + // - I can just recursively select for the path based on nodeID. return nil } -- cgit v1.2.3-70-g09d2