From 31c845759e474ce059cab701ca36b3a46e4b7bfe Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Sun, 10 Dec 2023 04:52:15 +0100 Subject: Use transactions and caching in imports --- main.go | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index 34c0806..e6eae07 100644 --- a/main.go +++ b/main.go @@ -70,10 +70,12 @@ type directoryManager struct { cache map[string]int64 // Unix-style paths to directory.id } -func (dm *directoryManager) IDForDirectoryPath(path string) (int64, error) { +func (dm *directoryManager) IDForDirectoryPath( + tx *sql.Tx, path string) (int64, error) { // Relative paths could be handled differently, // but right now, they're assumed to start at the root. - list := strings.Split(filepath.ToSlash(filepath.Clean(path)), "/") + path = filepath.ToSlash(filepath.Clean(path)) + list := strings.Split(path, "/") if len(list) > 1 && list[0] == "" { list = list[1:] } @@ -81,9 +83,15 @@ func (dm *directoryManager) IDForDirectoryPath(path string) (int64, error) { return 0, nil } + if dm.cache == nil { + dm.cache = make(map[string]int64) + } else if id, ok := dm.cache[path]; ok { + return id, nil + } + var parent sql.NullInt64 for _, name := range list { - if err := db.QueryRow( + if err := tx.QueryRow( `SELECT id FROM directory WHERE name = ? AND parent IS ?`, name, parent).Scan(&parent); err == nil { continue @@ -91,7 +99,7 @@ func (dm *directoryManager) IDForDirectoryPath(path string) (int64, error) { return 0, err } - if result, err := db.Exec( + if result, err := tx.Exec( `INSERT INTO directory(name, parent) VALUES (?, ?)`, name, parent); err != nil { return 0, err @@ -101,6 +109,7 @@ func (dm *directoryManager) IDForDirectoryPath(path string) (int64, error) { parent = sql.NullInt64{Int64: id, Valid: true} } } + dm.cache[path] = parent.Int64 return parent.Int64, nil } @@ -313,7 +322,11 @@ func isImage(path string) (bool, error) { return bytes.HasPrefix(out, []byte("image/")), nil } -func importFunc(path string, d fs.DirEntry, err error) error { +type importer struct { + dm directoryManager +} + +func (i *importer) Import(path string, d fs.DirEntry, err error) error { if err != nil || d.IsDir() { return err } @@ -363,24 +376,31 @@ func importFunc(path string, d fs.DirEntry, err error) error { return err } - // TODO: This should all run in a transaction. - if _, err = db.Exec(`INSERT INTO image(sha1) VALUES (?) + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + if _, err = tx.Exec(`INSERT INTO image(sha1) VALUES (?) ON CONFLICT(sha1) DO NOTHING`, hexSHA1); err != nil { return err } - // TODO: Maintain the cache across calls. - dm := directoryManager{} dbDirname, dbBasename := filepath.Split(path) - dbParent, err := dm.IDForDirectoryPath(dbDirname) + dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname) if err != nil { return err } - _, err = db.Exec(`INSERT INTO entry( + _, err = tx.Exec(`INSERT INTO entry( parent, name, mtime, sha1 ) VALUES (?, ?, ?, ?)`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1) - return err + if err != nil { + return err + } + + return tx.Commit() } // cmdImport adds files to the "entry" table. @@ -394,8 +414,12 @@ func cmdImport(args []string) error { // TODO: This would better be done in parallel (making hashes). // TODO: Show progress in some manner. Perhaps port my propeller code. + i := importer{} for _, name := range args[1:] { - if err := filepath.WalkDir(name, importFunc); err != nil { + if err := filepath.WalkDir(name, + func(path string, d fs.DirEntry, err error) error { + return i.Import(path, d, err) + }); err != nil { return err } } -- cgit v1.2.3-70-g09d2