package main import ( "bufio" "bytes" "context" "crypto/sha1" "database/sql" "encoding/hex" "encoding/json" "errors" "fmt" "html/template" "io" "io/fs" "log" "math/bits" "net" "net/http" "os" "os/exec" "os/signal" "path/filepath" "regexp" "runtime" "slices" "sort" "strconv" "strings" "sync" "time" "github.com/mattn/go-sqlite3" ) var ( db *sql.DB // sqlite database galleryDirectory string // gallery directory // taskSemaphore limits parallel computations. taskSemaphore semaphore ) const ( nameOfDB = "gallery.db" nameOfImageRoot = "images" nameOfThumbRoot = "thumbs" ) func hammingDistance(a, b int64) int { return bits.OnesCount64(uint64(a) ^ uint64(b)) } func init() { sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { return conn.RegisterFunc("hamming", hammingDistance, true) }, }) } func openDB(directory string) error { var err error db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory, nameOfDB+"?_foreign_keys=1&_busy_timeout=1000")) galleryDirectory = directory return err } func imagePath(sha1 string) string { return filepath.Join(galleryDirectory, nameOfImageRoot, sha1[:2], sha1) } func thumbPath(sha1 string) string { return filepath.Join(galleryDirectory, nameOfThumbRoot, sha1[:2], sha1+".webp") } func dbCollectStrings(query string) ([]string, error) { rows, err := db.Query(query) if err != nil { return nil, err } defer rows.Close() var result []string for rows.Next() { var s string if err := rows.Scan(&s); err != nil { return nil, err } result = append(result, s) } if err := rows.Err(); err != nil { return nil, err } return result, nil } // --- Semaphore --------------------------------------------------------------- type semaphore chan struct{} func newSemaphore(size int) semaphore { return make(chan struct{}, size) } func (s semaphore) release() { <-s } func (s semaphore) acquire(ctx context.Context) error { select { case <-ctx.Done(): return ctx.Err() case s <- struct{}{}: } // Give priority to context cancellation. select { case <-ctx.Done(): s.release() return ctx.Err() default: } return nil } // --- Progress bar ------------------------------------------------------------ type progressBar struct { mutex sync.Mutex current int target int } func newProgressBar(target int) *progressBar { pb := &progressBar{current: 0, target: target} pb.Update() return pb } func (pb *progressBar) Stop() { // The minimum thing that works: just print a newline. os.Stdout.WriteString("\n") } func (pb *progressBar) Update() { if pb.target < 0 { fmt.Printf("\r%d/?", pb.current) return } var fraction int if pb.target != 0 { fraction = int(float32(pb.current) / float32(pb.target) * 100) } target := fmt.Sprintf("%d", pb.target) fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, fraction) } func (pb *progressBar) Step() { pb.mutex.Lock() defer pb.mutex.Unlock() pb.current++ pb.Update() } // --- Initialization ---------------------------------------------------------- // cmdInit initializes a "gallery directory" that contains gallery.sqlite, // images, thumbs. func cmdInit(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } if _, err := db.Exec(initializeSQL); err != nil { return err } // XXX: There's technically no reason to keep images as symlinks, // we might just keep absolute paths in the database as well. if err := os.MkdirAll( filepath.Join(galleryDirectory, nameOfImageRoot), 0755); err != nil { return err } if err := os.MkdirAll( filepath.Join(galleryDirectory, nameOfThumbRoot), 0755); err != nil { return err } return nil } // --- Web --------------------------------------------------------------------- var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`) var staticHandler http.Handler var page = template.Must(template.New("/").Parse(` Gallery `)) func handleRequest(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { staticHandler.ServeHTTP(w, r) return } if err := page.Execute(w, nil); err != nil { log.Println(err) } } func handleImages(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, imagePath(m[1])) } } func handleThumbs(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, thumbPath(m[1])) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) { // TODO: This is like dbCollectStrings(), just needs an argument. rows, err := tx.Query(`SELECT name FROM node WHERE IFNULL(parent, 0) = ? AND sha1 IS NULL`, parent) if err != nil { return nil, err } defer rows.Close() names = []string{} for rows.Next() { var name string if err := rows.Scan(&name); err != nil { return nil, err } names = append(names, name) } return names, rows.Err() } type webEntry struct { SHA1 string `json:"sha1"` Name string `json:"name"` Modified int64 `json:"modified"` ThumbW int64 `json:"thumbW"` ThumbH int64 `json:"thumbH"` } func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) { rows, err := tx.Query(` SELECT i.sha1, n.name, n.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0) FROM node AS n JOIN image AS i ON n.sha1 = i.sha1 WHERE n.parent = ?`, parent) if err != nil { return nil, err } defer rows.Close() entries = []webEntry{} for rows.Next() { var e webEntry if err := rows.Scan( &e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil { return nil, err } entries = append(entries, e) } return entries, rows.Err() } func handleAPIBrowse(w http.ResponseWriter, r *http.Request) { var params struct { Path string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Subdirectories []string `json:"subdirectories"` Entries []webEntry `json:"entries"` } tx, err := db.Begin() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer tx.Rollback() parent, err := idForPath(tx, decodeWebPath(params.Path), false) if err != nil { http.Error(w, err.Error(), http.StatusNotFound) return } result.Subdirectories, err = getSubdirectories(tx, parent) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Entries, err = getSubentries(tx, parent) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - func getImageDimensions(sha1 string) (w int64, h int64, err error) { err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`, sha1).Scan(&w, &h) return } func getImagePaths(sha1 string) (paths []string, err error) { rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS ( SELECT parent, name AS path FROM node WHERE sha1 = ? UNION ALL SELECT n.parent, n.name || '/' || p.path FROM node AS n JOIN paths AS p ON n.id = p.parent ) SELECT path FROM paths WHERE parent IS NULL`, sha1) if err != nil { return nil, err } defer rows.Close() paths = []string{} for rows.Next() { var path string if err := rows.Scan(&path); err != nil { return nil, err } paths = append(paths, path) } return paths, rows.Err() } func getImageTags(sha1 string) (map[string]map[string]float32, error) { rows, err := db.Query(` SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta JOIN tag AS t ON t.id = ta.tag JOIN tag_space AS ts ON ts.id = t.space WHERE ta.sha1 = ?`, sha1) if err != nil { return nil, err } defer rows.Close() result := make(map[string]map[string]float32) for rows.Next() { var ( space, tag string weight float32 ) if err := rows.Scan(&space, &tag, &weight); err != nil { return nil, err } tags := result[space] if tags == nil { tags = make(map[string]float32) result[space] = tags } tags[tag] = weight } return result, rows.Err() } func handleAPIInfo(w http.ResponseWriter, r *http.Request) { var params struct { SHA1 string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Width int64 `json:"width"` Height int64 `json:"height"` Paths []string `json:"paths"` Tags map[string]map[string]float32 `json:"tags"` } var err error result.Width, result.Height, err = getImageDimensions(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Paths, err = getImagePaths(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Tags, err = getImageTags(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type webSimilarImage struct { SHA1 string `json:"sha1"` PixelsRatio float32 `json:"pixelsRatio"` ThumbW int64 `json:"thumbW"` ThumbH int64 `json:"thumbH"` Paths []string `json:"paths"` } func getSimilar(sha1 string, pixels int64, distance int) ( result []webSimilarImage, err error) { // For distance ∈ {0, 1}, this query is quite inefficient. // In exchange, it's generic. // // If there's a dhash, there should also be thumbnail dimensions, // so not bothering with IFNULL on them. rows, err := db.Query(`SELECT sha1, width * height, thumbw, thumbh FROM image WHERE hamming(dhash, (SELECT dhash FROM image WHERE sha1 = ?)) = ? AND sha1 <> ?`, sha1, distance, sha1) if err != nil { return nil, err } defer rows.Close() result = []webSimilarImage{} for rows.Next() { var ( match webSimilarImage matchPixels int64 ) if err = rows.Scan(&match.SHA1, &matchPixels, &match.ThumbW, &match.ThumbH); err != nil { return nil, err } if match.Paths, err = getImagePaths(match.SHA1); err != nil { return nil, err } match.PixelsRatio = float32(matchPixels) / float32(pixels) result = append(result, match) } return result, rows.Err() } func handleAPISimilar(w http.ResponseWriter, r *http.Request) { var params struct { SHA1 string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Info webSimilarImage `json:"info"` Groups map[string][]webSimilarImage `json:"groups"` } result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1} if paths, err := getImagePaths(params.SHA1); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } else { result.Info.Paths = paths } var width, height int64 err := db.QueryRow(`SELECT width, height, thumbw, thumbh FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &result.Info.ThumbW, &result.Info.ThumbH) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Groups = make(map[string][]webSimilarImage) for distance := 0; distance <= 1; distance++ { result.Groups[fmt.Sprintf("Perceptual distance %d", distance)], err = getSimilar(params.SHA1, width*height, distance) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cmdRun runs a web UI against GD on ADDRESS. func cmdRun(args []string) error { if len(args) != 2 { return errors.New("usage: GD ADDRESS") } if err := openDB(args[0]); err != nil { return err } address := args[1] // This separation is not strictly necessary, // but having an elementary level of security doesn't hurt either. staticHandler = http.FileServer(http.Dir("public")) http.HandleFunc("/", handleRequest) http.HandleFunc("/image/", handleImages) http.HandleFunc("/thumb/", handleThumbs) http.HandleFunc("/api/browse", handleAPIBrowse) http.HandleFunc("/api/info", handleAPIInfo) http.HandleFunc("/api/similar", handleAPISimilar) host, port, err := net.SplitHostPort(address) if err != nil { log.Println(err) } else if host == "" { log.Println("http://" + net.JoinHostPort("localhost", port)) } else { log.Println("http://" + address) } s := &http.Server{ Addr: address, ReadTimeout: 60 * time.Second, WriteTimeout: 60 * time.Second, MaxHeaderBytes: 32 << 10, } return s.ListenAndServe() } // --- Import ------------------------------------------------------------------ func idForPath(tx *sql.Tx, path []string, create bool) (int64, error) { var parent sql.NullInt64 for _, name := range path { if err := tx.QueryRow(`SELECT id FROM node WHERE parent IS ? AND name = ? AND sha1 IS NULL`, parent, name).Scan(&parent); err == nil { continue } else if !errors.Is(err, sql.ErrNoRows) { return 0, err } else if !create { return 0, err } // This fails when trying to override a leaf node. // That needs special handling. if result, err := tx.Exec( `INSERT INTO node(parent, name) VALUES (?, ?)`, parent, name); err != nil { return 0, err } else if id, err := result.LastInsertId(); err != nil { return 0, err } else { parent = sql.NullInt64{Int64: id, Valid: true} } } return parent.Int64, nil } func decodeWebPath(path string) []string { // Relative paths could be handled differently, // but right now, they're assumed to start at the root. result := []string{} for _, crumb := range strings.Split(path, "/") { if crumb != "" { result = append(result, crumb) } } return result } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type directoryManager struct { cache map[string]int64 // Unix-style paths to directory.id } func (dm *directoryManager) IDForDirectoryPath( tx *sql.Tx, path string) (int64, error) { path = filepath.ToSlash(filepath.Clean(path)) list := decodeWebPath(path) if len(list) == 0 { return 0, nil } if dm.cache == nil { dm.cache = make(map[string]int64) } else if id, ok := dm.cache[path]; ok { return id, nil } id, err := idForPath(tx, list, true) if err != nil { return 0, err } dm.cache[path] = id return id, nil } func isImage(path string) (bool, error) { out, err := exec.Command("xdg-mime", "query", "filetype", path).Output() if err != nil { return false, err } return bytes.HasPrefix(out, []byte("image/")), nil } func pingImage(path string) (int, int, error) { out, err := exec.Command("identify", "-limit", "thread", "1", "-ping", "-format", "%w %h", path+"[0]").Output() if err != nil { return 0, 0, err } var w, h int _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) return w, h, err } type importer struct { dm directoryManager dmMutex sync.Mutex } func (i *importer) Import(path string) error { // The input may be a relative path, and we want to remember it as such, // but symlinks for the images must be absolute. absPath, err := filepath.Abs(path) if err != nil { return err } // Skip videos, which ImageMagick can process, but we don't want it to, // so that they're not converted 1:1 to WebP. pathIsImage, err := isImage(path) if err != nil { return err } if !pathIsImage { return nil } width, height, err := pingImage(path) if err != nil { return err } f, err := os.Open(path) if err != nil { return err } defer f.Close() s, err := f.Stat() if err != nil { return err } hash := sha1.New() _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) if err != nil { return err } hexSHA1 := hex.EncodeToString(hash.Sum(nil)) pathImage := imagePath(hexSHA1) imageDirname, _ := filepath.Split(pathImage) if err := os.MkdirAll(imageDirname, 0755); err != nil { return err } if err := os.Symlink(absPath, pathImage); err != nil && !errors.Is(err, fs.ErrExist) { return err } // The directoryManager isn't thread-safe. // This lock also simulates a timeout-less BEGIN EXCLUSIVE. i.dmMutex.Lock() defer i.dmMutex.Unlock() tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err = tx.Exec(`INSERT INTO image(sha1, width, height) VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, hexSHA1, width, height); err != nil { return err } // XXX: The directoryManager's cache is questionable here, // if only because it keeps entries even when transactions fail. dbDirname, dbBasename := filepath.Split(path) dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname) if err != nil { return err } // FIXME: This disallows any entries directly in the root. _, err = tx.Exec(`INSERT INTO node(parent, name, mtime, sha1) VALUES (?, ?, ?, ?) ON CONFLICT DO UPDATE SET mtime = excluded.mtime, sha1 = excluded.sha1`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1) if err != nil { return err } return tx.Commit() } // cmdImport adds files to the "node" table. // TODO: Consider making this copy rather than symlink images. func cmdImport(args []string) error { if len(args) < 1 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // Make the first step collecting all the paths, // in order to show more useful progress information. paths := []string{} cb := func(path string, d fs.DirEntry, err error) error { if err != nil || d.IsDir() { return err } paths = append(paths, path) return nil } for _, name := range args[1:] { if err := filepath.WalkDir(name, cb); err != nil { return err } } pb := newProgressBar(len(paths)) defer pb.Stop() i := importer{} ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, path := range paths { if taskSemaphore.acquire(ctx) != nil { break } wg.Add(1) go func(path string) { defer taskSemaphore.release() defer wg.Done() if err := i.Import(path); err != nil { cancel(err) } else { pb.Step() } }(path) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type syncFileInfo struct { dbID int64 // DB node ID, or zero if there was none dbParent int64 // where the file was to be stored dbName string // the name under which it was to be stored fsPath string // symlink target fsMtime int64 // last modified Unix timestamp, used a bit like an ID err error // any processing error sha1 string // raw content hash, empty to skip file width int // image width in pixels height int // image height in pixels } type syncContext struct { ctx context.Context tx *sql.Tx info chan syncFileInfo pb *progressBar stmtOrphan *sql.Stmt stmtDisposeSub *sql.Stmt stmtDisposeAll *sql.Stmt } func syncPrintf(c *syncContext, format string, v ...any) { c.pb.Stop() log.Printf(format+"\n", v...) c.pb.Update() } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type syncNode struct { dbID int64 dbName string dbMtime int64 dbSHA1 string } func (n *syncNode) dbIsDir() bool { return n.dbSHA1 == "" } type syncFile struct { fsName string fsMtime int64 fsIsDir bool } type syncPair struct { db *syncNode fs *syncFile } // syncGetNodes returns direct children of a DB node, ordered by name. // SQLite, like Go, compares strings byte-wise by default. func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) { // This works even for the root, which doesn't exist as a DB node. rows, err := tx.Query(`SELECT id, name, IFNULL(mtime, 0), IFNULL(sha1, '') FROM node WHERE IFNULL(parent, 0) = ? ORDER BY name`, dbParent) if err != nil { return } defer rows.Close() for rows.Next() { var node syncNode if err = rows.Scan(&node.dbID, &node.dbName, &node.dbMtime, &node.dbSHA1); err != nil { return } nodes = append(nodes, node) } return nodes, rows.Err() } // syncGetFiles returns direct children of a FS directory, ordered by name. func syncGetFiles(fsPath string) (files []syncFile, err error) { dir, err := os.Open(fsPath) if err != nil { return } defer dir.Close() entries, err := dir.ReadDir(0) if err != nil { return } for _, entry := range entries { info, err := entry.Info() if err != nil { return files, err } files = append(files, syncFile{ fsName: entry.Name(), fsMtime: info.ModTime().Unix(), fsIsDir: entry.IsDir(), }) } sort.Slice(files, func(a, b int) bool { return files[a].fsName < files[b].fsName }) return } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - func syncProcess(c *syncContext, info *syncFileInfo) error { // Skip videos, which ImageMagick can process, but we don't want it to, // so that they're not converted 1:1 to WebP. pathIsImage, err := isImage(info.fsPath) if err != nil { return err } if !pathIsImage { return nil } info.width, info.height, err = pingImage(info.fsPath) if err != nil { return err } f, err := os.Open(info.fsPath) if err != nil { return err } defer f.Close() // We could make this at least somewhat interruptible by c.ctx, // though it would still work poorly. hash := sha1.New() _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) if err != nil { return err } info.sha1 = hex.EncodeToString(hash.Sum(nil)) return nil } // syncEnqueue runs file scanning, which can be CPU and I/O expensive, // in parallel. The goroutine only touches the filesystem, read-only. func syncEnqueue(c *syncContext, info syncFileInfo) error { if err := taskSemaphore.acquire(c.ctx); err != nil { return err } go func(info syncFileInfo) { defer taskSemaphore.release() info.err = syncProcess(c, &info) c.info <- info }(info) return nil } // syncDequeue flushes the result queue of finished asynchronous tasks. func syncDequeue(c *syncContext) error { for { select { case <-c.ctx.Done(): return c.ctx.Err() case info := <-c.info: if err := syncPostProcess(c, info); err != nil { return err } default: return nil } } } // syncDispose creates orphan records for the entire subtree given by nodeID // as appropriate, then deletes all nodes within the subtree. The subtree root // node is not deleted if "keepNode" is true. // // Orphans keep their thumbnail files, as evidence. func syncDispose(c *syncContext, nodeID int64, keepNode bool) error { if _, err := c.stmtOrphan.Exec(nodeID); err != nil { return err } if keepNode { if _, err := c.stmtDisposeSub.Exec(nodeID); err != nil { return err } } else { if _, err := c.stmtDisposeAll.Exec(nodeID); err != nil { return err } } return nil } func syncImage(c *syncContext, info syncFileInfo) error { if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height) VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, info.sha1, info.width, info.height); err != nil { return err } // Fast path: it may already there, and not be a dead symlink. path := imagePath(info.sha1) if _, err := os.Stat(path); err == nil { return nil } dirname, _ := filepath.Split(path) if err := os.MkdirAll(dirname, 0755); err != nil { return err } for { err := os.Symlink(info.fsPath, path) if !errors.Is(err, fs.ErrExist) { return err } // Try to remove anything standing in the way, and try again. if err = os.Remove(path); err != nil { return err } } } func syncPostProcess(c *syncContext, info syncFileInfo) error { defer c.pb.Step() // TODO: When replacing an image node (whether it has or doesn't have // other links to keep it alive), we could offer copying all tags, // though this needs another table to track it. // (If it's equivalent enough, the dhash will stay the same, // so user can resolve this through the duplicates feature.) switch { case info.err != nil: // * → error return info.err case info.sha1 == "": // 0 → 0 if info.dbID == 0 { return nil } // D → 0, F → 0 return syncDispose(c, info.dbID, false /*keepNode*/) case info.dbID == 0: // 0 → F if err := syncImage(c, info); err != nil { return err } if _, err := c.tx.Exec(`INSERT INTO node(parent, name, mtime, sha1) VALUES (?, ?, ?, ?)`, info.dbParent, info.dbName, info.fsMtime, info.sha1); err != nil { return err } return nil default: // D → F, F → F (this statement is a no-op with the latter) if err := syncDispose(c, info.dbID, true /*keepNode*/); err != nil { return err } // Even if the hash didn't change, we may fix any broken symlinks. if err := syncImage(c, info); err != nil { return err } if _, err := c.tx.Exec(`UPDATE node SET mtime = ?, sha1 = ? WHERE id = ?`, info.fsMtime, info.sha1, info.dbID); err != nil { return err } return nil } } func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string, pair syncPair) error { db, fs, fsInfo := pair.db, pair.fs, syncFileInfo{dbParent: dbParent} if db != nil { fsInfo.dbID = db.dbID } if fs != nil { fsInfo.dbName = fs.fsName fsInfo.fsPath = filepath.Join(fsPath, fs.fsName) fsInfo.fsMtime = fs.fsMtime } switch { case db == nil && fs == nil: // 0 → 0, unreachable. case db == nil && fs.fsIsDir: // 0 → D var id int64 if result, err := c.tx.Exec(`INSERT INTO node(parent, name) VALUES (?, ?)`, dbParent, fs.fsName); err != nil { return err } else if id, err = result.LastInsertId(); err != nil { return err } return syncDirectory(c, id, filepath.Join(fsPath, fs.fsName)) case db == nil: // 0 → F (or 0 → 0) return syncEnqueue(c, fsInfo) case fs == nil: // D → 0, F → 0 return syncDispose(c, db.dbID, false /*keepNode*/) case db.dbIsDir() && fs.fsIsDir: // D → D return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName)) case db.dbIsDir(): // D → F (or D → 0) return syncEnqueue(c, fsInfo) case fs.fsIsDir: // F → D if err := syncDispose(c, db.dbID, true /*keepNode*/); err != nil { return err } if _, err := c.tx.Exec(`UPDATE node SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil { return err } return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName)) case db.dbMtime != fs.fsMtime: // F → F (or F → 0) // Assuming that any content modifications change the timestamp. return syncEnqueue(c, fsInfo) } return nil } func syncDirectory(c *syncContext, dbParent int64, fsPath string) error { db, err := syncGetNodes(c.tx, dbParent) if err != nil { return err } fs, err := syncGetFiles(fsPath) if err != nil { return err } // This would not be fatal, but it has annoying consequences. if _, ok := slices.BinarySearchFunc(fs, syncFile{fsName: nameOfDB}, func(a, b syncFile) int { return strings.Compare(a.fsName, b.fsName) }); ok { syncPrintf(c, "%s may be a gallery directory, treating as empty", fsPath) fs = nil } // Convert differences to a more convenient form for processing. iDB, iFS, pairs := 0, 0, []syncPair{} for iDB < len(db) && iFS < len(fs) { if db[iDB].dbName == fs[iFS].fsName { pairs = append(pairs, syncPair{&db[iDB], &fs[iFS]}) } else if db[iDB].dbName < fs[iFS].fsName { pairs = append(pairs, syncPair{&db[iDB], nil}) iDB++ } else { pairs = append(pairs, syncPair{nil, &fs[iFS]}) iFS++ } } for i := range db[iDB:] { pairs = append(pairs, syncPair{&db[iDB+i], nil}) } for i := range fs[iFS:] { pairs = append(pairs, syncPair{nil, &fs[iFS+i]}) } for _, pair := range pairs { if err := syncDequeue(c); err != nil { return err } if err := syncDirectoryPair(c, dbParent, fsPath, pair); err != nil { return err } } return nil } func syncRoot(c *syncContext, fsPath string) error { // Figure out a database root (not trying to convert F → D on conflict, // also because we don't know yet if the argument is a directory). // // Synchronizing F → D or * → F are special cases not worth implementing. crumbs := decodeWebPath(filepath.ToSlash(fsPath)) dbParent, err := idForPath(c.tx, crumbs, true) if err != nil { return err } if err := syncDirectory(c, dbParent, fsPath); err != nil { return err } // Wait for all tasks to finish, and process the results of their work. for i := 0; i < cap(taskSemaphore); i++ { if err := taskSemaphore.acquire(c.ctx); err != nil { return err } } if err := syncDequeue(c); err != nil { return err } // This is not our semaphore, so prepare it for the next user. for i := 0; i < cap(taskSemaphore); i++ { taskSemaphore.release() } return nil } const disposeCTE = `WITH RECURSIVE root(id, sha1, parent, path) AS ( SELECT id, sha1, parent, name FROM node WHERE id = ? UNION ALL SELECT r.id, r.sha1, n.parent, n.name || '/' || r.path FROM node AS n JOIN root AS r ON n.id = r.parent ), children(id, sha1, path, level) AS ( SELECT id, sha1, path, 1 FROM root WHERE parent IS NULL UNION ALL SELECT n.id, n.sha1, c.path || '/' || n.name, c.level + 1 FROM node AS n JOIN children AS c ON n.parent = c.id ), removed(sha1, count, path) AS ( SELECT sha1, COUNT(*) AS count, MIN(path) AS path FROM children GROUP BY sha1 ), orphaned(sha1, path, count, total) AS ( SELECT r.sha1, r.path, r.count, COUNT(*) AS total FROM removed AS r JOIN node ON node.sha1 = r.sha1 GROUP BY node.sha1 HAVING count = total )` func syncRun(ctx context.Context, tx *sql.Tx, roots []string) error { c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1)} defer c.pb.Stop() var err error if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + ` INSERT OR IGNORE INTO orphan(sha1, path) SELECT sha1, path FROM orphaned`); err != nil { return err } if c.stmtDisposeSub, err = c.tx.Prepare(disposeCTE + ` DELETE FROM node WHERE id IN (SELECT DISTINCT id FROM children WHERE level <> 1)`); err != nil { return err } if c.stmtDisposeAll, err = c.tx.Prepare(disposeCTE + ` DELETE FROM node WHERE id IN (SELECT DISTINCT id FROM children)`); err != nil { return err } // Info tasks take a position in the task semaphore channel. // then fill the info channel. // // Immediately after syncDequeue(), the info channel is empty, // but the semaphore might be full. // // By having at least one position in the info channel, // we allow at least one info task to run to semaphore release, // so that syncEnqueue() doesn't deadlock. // // By making it the same size as the semaphore, // the end of this function doesn't need to dequeue while waiting. // It also prevents goroutine leaks despite leaving them running-- // once they finish their job, they're gone, // and eventually the info channel would get garbage collected. // // The additional slot is there to handle the one result // that may be placed while syncEnqueue() waits for the semaphore, // i.e., it is for the result of the task that syncEnqueue() spawns. c.info = make(chan syncFileInfo, cap(taskSemaphore)+1) for _, path := range roots { if err := syncRoot(&c, path); err != nil { return err } } // TODO: Garbage collect empty directories, recursively. // Ideally, stop at the affected DB roots (assuming we go bottom-up). // // We need to do this at the end, due to our recursive handling, // as well as because of asynchronous file filtering. return nil } // cmdSync ensures the given (sub)roots are accurately reflected // in the database. func cmdSync(args []string) error { if len(args) < 2 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // TODO: See if the SQLite can cancel anything in a useful manner. // If using this, beware that a cancel prevents commiting transactions. ctx := context.Background() // In case of a failure during processing, the only retained side effects // on the filesystem tree are: // - Fixing dead symlinks to images. // - Creating symlinks to images that aren't necessary. tx, err := db.BeginTx(ctx, nil) if err != nil { return err } defer tx.Rollback() // Mild hack: upgrade the transaction to a write one straight away, // in order to rule out deadlocks (preventable failure). if _, err := tx.Exec(`END TRANSACTION; BEGIN IMMEDIATE TRANSACTION`); err != nil { return err } // XXX: By not using the context for the transaction, // interrupts can get ignored around the Commit. ctxSignal, stop := signal.NotifyContext(ctx, os.Interrupt) defer stop() // Normalize arguments. // At least for now, turn all roots into absolute paths. roots := args[1:] for i := range roots { roots[i], err = filepath.Abs(filepath.Clean(roots[i])) if err != nil { return err } } // Filter out duplicates. sort.Strings(roots) roots = slices.CompactFunc(roots, func(a, b string) bool { if a != b && !strings.HasPrefix(b, a+"/") { return false } log.Printf("asking to sync path twice: %s\n", b) return true }) if err := syncRun(ctxSignal, tx, roots); err != nil { return err } return tx.Commit() } // --- Tagging ----------------------------------------------------------------- // cmdTag mass imports tags from data passed on stdin as a TSV // of SHA1 TAG WEIGHT entries. func cmdTag(args []string) error { if len(args) < 2 || len(args) > 3 { return errors.New("usage: GD SPACE [DESCRIPTION]") } if err := openDB(args[0]); err != nil { return err } space := args[1] var description sql.NullString if len(args) >= 3 { description = sql.NullString{String: args[2], Valid: true} } // Note that starting as a write transaction prevents deadlocks. // Imports are rare, and just bulk load data, so this scope is fine. tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description) VALUES (?, ?)`, space, description); err != nil { return err } var spaceID int64 if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`, space).Scan(&spaceID); err != nil { return err } // XXX: It might make sense to pre-erase all tag assignments within // the given space for that image, the first time we see it: // // DELETE FROM tag_assignment // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?) // // or even just clear the tag space completely: // // DELETE FROM tag_assignment // WHERE tag IN (SELECT id FROM tag WHERE space = ?); // DELETE FROM tag WHERE space = ?; stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight) VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?) ON CONFLICT DO UPDATE SET weight = ?`) if err != nil { return err } scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { fields := strings.Split(scanner.Text(), "\t") if len(fields) != 3 { return errors.New("invalid input format") } sha1, tag := fields[0], fields[1] weight, err := strconv.ParseFloat(fields[2], 64) if err != nil { return err } if _, err := tx.Exec( `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`, spaceID, tag); err != nil { return nil } if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil { return fmt.Errorf("%s: %s", sha1, err) } } if err := scanner.Err(); err != nil { return err } return tx.Commit() } // --- Check ------------------------------------------------------------------- func isValidSHA1(hash string) bool { if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash { return false } if _, err := hex.DecodeString(hash); err != nil { return false } return true } func hashesToFileListing(root, suffix string, hashes []string) []string { // Note that we're semi-duplicating {image,thumb}Path(). paths := []string{root} for _, hash := range hashes { dir := filepath.Join(root, hash[:2]) paths = append(paths, dir, filepath.Join(dir, hash+suffix)) } slices.Sort(paths) return slices.Compact(paths) } func collectFileListing(root string) (paths []string, err error) { err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { paths = append(paths, path) return err }) // Even though it should already be sorted somehow. slices.Sort(paths) return } func checkFiles(root, suffix string, hashes []string) (bool, []string, error) { db := hashesToFileListing(root, suffix, hashes) fs, err := collectFileListing(root) if err != nil { return false, nil, err } iDB, iFS, ok, intersection := 0, 0, true, []string{} for iDB < len(db) && iFS < len(fs) { if db[iDB] == fs[iFS] { intersection = append(intersection, db[iDB]) iDB++ iFS++ } else if db[iDB] < fs[iFS] { ok = false fmt.Printf("only in DB: %s\n", db[iDB]) iDB++ } else { ok = false fmt.Printf("only in FS: %s\n", fs[iFS]) iFS++ } } for _, path := range db[iDB:] { ok = false fmt.Printf("only in DB: %s\n", path) } for _, path := range fs[iFS:] { ok = false fmt.Printf("only in FS: %s\n", path) } return ok, intersection, nil } // cmdCheck carries out various database consistency checks. func cmdCheck(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } // Check if hashes are in the right format. log.Println("checking image hashes") allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`) if err != nil { return err } ok := true for _, hash := range allSHA1 { if !isValidSHA1(hash) { ok = false fmt.Printf("invalid image SHA1: %s\n", hash) } } // This is, rather obviously, just a strict subset. // Although it doesn't run in the same transaction. thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`) if err != nil { return err } // This somewhat duplicates {image,thumb}Path(). log.Println("checking SQL against filesystem") okImages, intersection, err := checkFiles( filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1) if err != nil { return err } okThumbs, _, err := checkFiles( filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1) if err != nil { return err } if !okImages || !okThumbs { ok = false } // NOTE: We could also compare mtime, and on mismatch the current SHA1, // though that's more of a "sync" job. log.Println("checking for dead symlinks") for _, path := range intersection { if _, err := os.Stat(path); err != nil { ok = false fmt.Printf("%s: %s\n", path, err) } } if !ok { return errors.New("detected inconsistencies") } return nil } // --- Thumbnailing ------------------------------------------------------------ func makeThumbnail(pathImage, pathThumb string) (int, int, error) { thumbDirname, _ := filepath.Split(pathThumb) if err := os.MkdirAll(thumbDirname, 0755); err != nil { return 0, 0, err } // Create a normalized thumbnail. Since we don't particularly need // any complex processing, such as surrounding of metadata, // simply push it through ImageMagick. // // - http://www.ericbrasseur.org/gamma.html // - https://www.imagemagick.org/Usage/thumbnails/ // - https://imagemagick.org/script/command-line-options.php#layers // // "info:" output is written for each frame, which is why we delete // all of them but the first one beforehands. // // TODO: See if we can optimize resulting WebP animations. // (Do -layers optimize* apply to this format at all?) cmd := exec.Command("convert", "-limit", "thread", "1", pathImage, "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip", "-resize", "256x128>", "-colorspace", "sRGB", "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:") out, err := cmd.Output() if err != nil { return 0, 0, err } var w, h int _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) return w, h, err } func makeThumbnailFor(sha1 string) error { pathImage := imagePath(sha1) pathThumb := thumbPath(sha1) w, h, err := makeThumbnail(pathImage, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`, w, h, sha1) return err } // cmdThumbnail generates missing thumbnails, in parallel. func cmdThumbnail(args []string) error { if len(args) < 1 { return errors.New("usage: GD [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hexSHA1 := args[1:] if len(hexSHA1) == 0 { // Get all unique images in the database with no thumbnail. var err error hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image WHERE thumbw IS NULL OR thumbh IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, sha1 := range hexSHA1 { if taskSemaphore.acquire(ctx) != nil { break } wg.Add(1) go func(sha1 string) { defer taskSemaphore.release() defer wg.Done() if err := makeThumbnailFor(sha1); err != nil { cancel(err) } else { pb.Step() } }(sha1) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // --- Perceptual hash --------------------------------------------------------- func makeDhash(hasher, pathThumb string) (uint64, error) { out, err := exec.Command(hasher, pathThumb).Output() if err != nil { return 0, err } var hash uint64 _, err = fmt.Fscanf(bytes.NewReader(out), "%x", &hash) return hash, err } // cmdDhash generates perceptual hash from thumbnails. func cmdDhash(args []string) error { if len(args) < 1 { return errors.New("usage: GD HASHER [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hasher, hexSHA1 := args[1], args[2:] if len(hexSHA1) == 0 { var err error hexSHA1, err = dbCollectStrings(` SELECT sha1 FROM image WHERE dhash IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() // TODO: Also run the hasher in parallel, once it becomes a problem. // And/or run it in batches, since start-up time of the hasher // poses considerable overhead with large amounts of images. for _, sha1 := range hexSHA1 { pathThumb := thumbPath(sha1) hash, err := makeDhash(hasher, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET dhash = ? WHERE sha1 = ?`, int64(hash), sha1) if err != nil { return err } pb.Step() } return nil } // --- Main -------------------------------------------------------------------- var commands = map[string]struct { handler func(args []string) error }{ "init": {cmdInit}, "run": {cmdRun}, "import": {cmdImport}, "tag": {cmdTag}, "sync": {cmdSync}, "check": {cmdCheck}, "thumbnail": {cmdThumbnail}, "dhash": {cmdDhash}, } func main() { if len(os.Args) <= 2 { log.Fatalln("Missing arguments") } cmd, ok := commands[os.Args[1]] if !ok { log.Fatalln("Unknown command: " + os.Args[1]) } taskSemaphore = newSemaphore(runtime.NumCPU()) err := cmd.handler(os.Args[2:]) // Note that the database object has a closing finalizer, // we just additionally print any errors coming from there. if db != nil { if err := db.Close(); err != nil { log.Println(err) } } if err != nil { log.Fatalln(err) } }