From 054078908a1e4c7429ea0f5a3a0605addfccc46c Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch
Date: Fri, 8 Dec 2023 02:16:04 +0100
Subject: Initial commit
---
main.go | 2497 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 2497 insertions(+)
create mode 100644 main.go
(limited to 'main.go')
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..95bb502
--- /dev/null
+++ b/main.go
@@ -0,0 +1,2497 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "crypto/sha1"
+ "database/sql"
+ "encoding/hex"
+ "encoding/json"
+ "errors"
+ "flag"
+ "fmt"
+ "html/template"
+ "image"
+ "image/color"
+ "io"
+ "io/fs"
+ "log"
+ "math"
+ "math/bits"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "os/signal"
+ "path/filepath"
+ "regexp"
+ "runtime"
+ "slices"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "syscall"
+ "time"
+
+ "github.com/mattn/go-sqlite3"
+ "golang.org/x/image/draw"
+ "golang.org/x/image/webp"
+)
+
+var (
+ db *sql.DB // sqlite database
+ galleryDirectory string // gallery directory
+
+ // taskSemaphore limits parallel computations.
+ taskSemaphore semaphore
+)
+
+const (
+ nameOfDB = "gallery.db"
+ nameOfImageRoot = "images"
+ nameOfThumbRoot = "thumbs"
+)
+
+func hammingDistance(a, b int64) int {
+ return bits.OnesCount64(uint64(a) ^ uint64(b))
+}
+
+func init() {
+ sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{
+ ConnectHook: func(conn *sqlite3.SQLiteConn) error {
+ return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/)
+ },
+ })
+}
+
+func openDB(directory string) error {
+ var err error
+ db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory,
+ nameOfDB+"?_foreign_keys=1&_busy_timeout=1000"))
+ galleryDirectory = directory
+ return err
+}
+
+func imagePath(sha1 string) string {
+ return filepath.Join(galleryDirectory,
+ nameOfImageRoot, sha1[:2], sha1)
+}
+
+func thumbPath(sha1 string) string {
+ return filepath.Join(galleryDirectory,
+ nameOfThumbRoot, sha1[:2], sha1+".webp")
+}
+
+func dbCollectStrings(query string, a ...any) ([]string, error) {
+ rows, err := db.Query(query, a...)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result := []string{}
+ for rows.Next() {
+ var s string
+ if err := rows.Scan(&s); err != nil {
+ return nil, err
+ }
+ result = append(result, s)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func idForDirectoryPath(tx *sql.Tx, path []string, create bool) (int64, error) {
+ var parent sql.NullInt64
+ for _, name := range path {
+ if err := tx.QueryRow(`SELECT id FROM node
+ WHERE parent IS ? AND name = ? AND sha1 IS NULL`,
+ parent, name).Scan(&parent); err == nil {
+ continue
+ } else if !errors.Is(err, sql.ErrNoRows) {
+ return 0, err
+ } else if !create {
+ return 0, err
+ }
+
+ // This fails when trying to override a leaf node.
+ // That needs special handling.
+ if result, err := tx.Exec(
+ `INSERT INTO node(parent, name) VALUES (?, ?)`,
+ parent, name); err != nil {
+ return 0, err
+ } else if id, err := result.LastInsertId(); err != nil {
+ return 0, err
+ } else {
+ parent = sql.NullInt64{Int64: id, Valid: true}
+ }
+ }
+ return parent.Int64, nil
+}
+
+func decodeWebPath(path string) []string {
+ // Relative paths could be handled differently,
+ // but right now, they're assumed to start at the root.
+ result := []string{}
+ for _, crumb := range strings.Split(path, "/") {
+ if crumb != "" {
+ result = append(result, crumb)
+ }
+ }
+ return result
+}
+
+// --- Semaphore ---------------------------------------------------------------
+
+type semaphore chan struct{}
+
+func newSemaphore(size int) semaphore { return make(chan struct{}, size) }
+func (s semaphore) release() { <-s }
+
+func (s semaphore) acquire(ctx context.Context) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case s <- struct{}{}:
+ }
+
+ // Give priority to context cancellation.
+ select {
+ case <-ctx.Done():
+ s.release()
+ return ctx.Err()
+ default:
+ }
+ return nil
+}
+
+// --- Progress bar ------------------------------------------------------------
+
+type progressBar struct {
+ sync.Mutex
+ current int
+ target int
+}
+
+func newProgressBar(target int) *progressBar {
+ pb := &progressBar{current: 0, target: target}
+ pb.Update()
+ return pb
+}
+
+func (pb *progressBar) Stop() {
+ // The minimum thing that works: just print a newline.
+ os.Stdout.WriteString("\n")
+}
+
+func (pb *progressBar) Update() {
+ if pb.target < 0 {
+ fmt.Printf("\r%d/?", pb.current)
+ return
+ }
+
+ var fraction int
+ if pb.target != 0 {
+ fraction = int(float32(pb.current) / float32(pb.target) * 100)
+ }
+
+ target := fmt.Sprintf("%d", pb.target)
+ fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, fraction)
+}
+
+func (pb *progressBar) Step() {
+ pb.Lock()
+ defer pb.Unlock()
+
+ pb.current++
+ pb.Update()
+}
+
+func (pb *progressBar) Interrupt(callback func()) {
+ pb.Lock()
+ defer pb.Unlock()
+ pb.Stop()
+ defer pb.Update()
+
+ callback()
+}
+
+// --- Parallelization ---------------------------------------------------------
+
+type parallelFunc func(item string) (message string, err error)
+
+// parallelize runs the callback in parallel on a list of strings,
+// reporting progress and any non-fatal messages.
+func parallelize(strings []string, callback parallelFunc) error {
+ pb := newProgressBar(len(strings))
+ defer pb.Stop()
+
+ ctx, cancel := context.WithCancelCause(context.Background())
+ wg := sync.WaitGroup{}
+ for _, item := range strings {
+ if taskSemaphore.acquire(ctx) != nil {
+ break
+ }
+
+ wg.Add(1)
+ go func(item string) {
+ defer taskSemaphore.release()
+ defer wg.Done()
+ if message, err := callback(item); err != nil {
+ cancel(err)
+ } else if message != "" {
+ pb.Interrupt(func() { log.Printf("%s: %s\n", item, message) })
+ }
+ pb.Step()
+ }(item)
+ }
+ wg.Wait()
+ if ctx.Err() != nil {
+ return context.Cause(ctx)
+ }
+ return nil
+}
+
+// --- Initialization ----------------------------------------------------------
+
+// cmdInit initializes a "gallery directory" that contains gallery.sqlite,
+// images, thumbs.
+func cmdInit(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ if _, err := db.Exec(initializeSQL); err != nil {
+ return err
+ }
+
+ // XXX: There's technically no reason to keep images as symlinks,
+ // we might just keep absolute paths in the database as well.
+ if err := os.MkdirAll(
+ filepath.Join(galleryDirectory, nameOfImageRoot), 0755); err != nil {
+ return err
+ }
+ if err := os.MkdirAll(
+ filepath.Join(galleryDirectory, nameOfThumbRoot), 0755); err != nil {
+ return err
+ }
+ return nil
+}
+
+// --- Web ---------------------------------------------------------------------
+
+var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
+var staticHandler http.Handler
+
+var page = template.Must(template.New("/").Parse(`
+ Gallery
+
+
+
+
+
+
+
+`))
+
+func handleRequest(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path != "/" {
+ staticHandler.ServeHTTP(w, r)
+ return
+ }
+ if err := page.Execute(w, nil); err != nil {
+ log.Println(err)
+ }
+}
+
+func handleImages(w http.ResponseWriter, r *http.Request) {
+ if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
+ http.NotFound(w, r)
+ } else {
+ http.ServeFile(w, r, imagePath(m[1]))
+ }
+}
+
+func handleThumbs(w http.ResponseWriter, r *http.Request) {
+ if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
+ http.NotFound(w, r)
+ } else {
+ http.ServeFile(w, r, thumbPath(m[1]))
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) {
+ return dbCollectStrings(`SELECT name FROM node
+ WHERE IFNULL(parent, 0) = ? AND sha1 IS NULL`, parent)
+}
+
+type webEntry struct {
+ SHA1 string `json:"sha1"`
+ Name string `json:"name"`
+ Modified int64 `json:"modified"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+}
+
+func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) {
+ rows, err := tx.Query(`
+ SELECT i.sha1, n.name, n.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0)
+ FROM node AS n
+ JOIN image AS i ON n.sha1 = i.sha1
+ WHERE n.parent = ?`, parent)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ entries = []webEntry{}
+ for rows.Next() {
+ var e webEntry
+ if err = rows.Scan(
+ &e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil {
+ return nil, err
+ }
+ entries = append(entries, e)
+ }
+ return entries, rows.Err()
+}
+
+func handleAPIBrowse(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Path string
+ }
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Subdirectories []string `json:"subdirectories"`
+ Entries []webEntry `json:"entries"`
+ }
+
+ tx, err := db.Begin()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ defer tx.Rollback()
+
+ parent, err := idForDirectoryPath(tx, decodeWebPath(params.Path), false)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ }
+
+ result.Subdirectories, err = getSubdirectories(tx, parent)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ result.Entries, err = getSubentries(tx, parent)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webTagNamespace struct {
+ Description string `json:"description"`
+ Tags map[string]int64 `json:"tags"`
+}
+
+func getTags(nsID int64) (result map[string]int64, err error) {
+ rows, err := db.Query(`
+ SELECT t.name, COUNT(ta.tag) AS count
+ FROM tag AS t
+ LEFT JOIN tag_assignment AS ta ON t.id = ta.tag
+ WHERE t.space = ?
+ GROUP BY t.id`, nsID)
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ result = make(map[string]int64)
+ for rows.Next() {
+ var (
+ name string
+ count int64
+ )
+ if err = rows.Scan(&name, &count); err != nil {
+ return
+ }
+ result[name] = count
+ }
+ return result, rows.Err()
+}
+
+func getTagNamespaces(match *string) (
+ result map[string]webTagNamespace, err error) {
+ var rows *sql.Rows
+ if match != nil {
+ rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
+ FROM tag_space WHERE name = ?`, *match)
+ } else {
+ rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
+ FROM tag_space`)
+ }
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ result = make(map[string]webTagNamespace)
+ for rows.Next() {
+ var (
+ id int64
+ name string
+ ns webTagNamespace
+ )
+ if err = rows.Scan(&id, &name, &ns.Description); err != nil {
+ return
+ }
+ if ns.Tags, err = getTags(id); err != nil {
+ return
+ }
+ result[name] = ns
+ }
+ return result, rows.Err()
+}
+
+func handleAPITags(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Namespace *string
+ }
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ result, err := getTagNamespaces(params.Namespace)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webDuplicateImage struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Occurences int64 `json:"occurences"`
+}
+
+// A hamming distance of zero (direct dhash match) will be more than sufficient.
+const duplicatesCTE = `WITH
+ duplicated(dhash, count) AS (
+ SELECT dhash, COUNT(*) AS count FROM image
+ WHERE dhash IS NOT NULL
+ GROUP BY dhash HAVING count > 1
+ ),
+ multipathed(sha1, count) AS (
+ SELECT n.sha1, COUNT(*) AS count FROM node AS n
+ JOIN image AS i ON i.sha1 = n.sha1
+ WHERE i.dhash IS NULL
+ OR i.dhash NOT IN (SELECT dhash FROM duplicated)
+ GROUP BY n.sha1 HAVING count > 1
+ )
+`
+
+func getDuplicatesSimilar(stmt *sql.Stmt, dhash int64) (
+ result []webDuplicateImage, err error) {
+ rows, err := stmt.Query(dhash)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webDuplicateImage{}
+ for rows.Next() {
+ var image webDuplicateImage
+ if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
+ &image.Occurences); err != nil {
+ return nil, err
+ }
+ result = append(result, image)
+ }
+ return result, rows.Err()
+}
+
+func getDuplicates1(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
+ stmt, err := db.Prepare(`
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ COUNT(*) AS occurences
+ FROM image AS i
+ JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.dhash = ?
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT dhash FROM duplicated`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ group []webDuplicateImage
+ dhash int64
+ )
+ if err = rows.Scan(&dhash); err != nil {
+ return nil, err
+ }
+ if group, err = getDuplicatesSimilar(stmt, dhash); err != nil {
+ return nil, err
+ }
+ result = append(result, group)
+ }
+ return result, rows.Err()
+}
+
+func getDuplicates2(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
+ stmt, err := db.Prepare(`
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ COUNT(*) AS occurences
+ FROM image AS i
+ JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.sha1 = ?
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT sha1 FROM multipathed`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ image webDuplicateImage
+ sha1 string
+ )
+ if err = rows.Scan(&sha1); err != nil {
+ return nil, err
+ }
+ if err := stmt.QueryRow(sha1).Scan(&image.SHA1,
+ &image.ThumbW, &image.ThumbH, &image.Occurences); err != nil {
+ return nil, err
+ }
+ result = append(result, []webDuplicateImage{image})
+ }
+ return result, rows.Err()
+}
+
+func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
+ var params struct{}
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var (
+ result = [][]webDuplicateImage{}
+ err error
+ )
+ if result, err = getDuplicates1(result); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if result, err = getDuplicates2(result); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webOrphanImage struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Tags int64 `json:"tags"`
+}
+
+type webOrphan struct {
+ webOrphanImage
+ LastPath string `json:"lastPath"`
+ Replacement *webOrphanImage `json:"replacement"`
+}
+
+func getOrphanReplacement(webPath string) (*webOrphanImage, error) {
+ tx, err := db.Begin()
+ if err != nil {
+ return nil, err
+ }
+ defer tx.Rollback()
+
+ path := decodeWebPath(webPath)
+ if len(path) == 0 {
+ return nil, nil
+ }
+
+ parent, err := idForDirectoryPath(tx, path[:len(path)-1], false)
+ if err != nil {
+ return nil, err
+ }
+
+ var image webOrphanImage
+ err = db.QueryRow(`SELECT i.sha1,
+ IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
+ FROM node AS n
+ JOIN image AS i ON n.sha1 = i.sha1
+ LEFT JOIN tag_assignment AS ta ON n.sha1 = ta.sha1
+ WHERE n.parent = ? AND n.name = ?
+ GROUP BY n.sha1`, parent, path[len(path)-1]).Scan(
+ &image.SHA1, &image.ThumbW, &image.ThumbH, &image.Tags)
+ if errors.Is(err, sql.ErrNoRows) {
+ return nil, nil
+ } else if err != nil {
+ return nil, err
+ }
+ return &image, nil
+}
+
+func getOrphans() (result []webOrphan, err error) {
+ rows, err := db.Query(`SELECT o.sha1, o.path,
+ IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
+ FROM orphan AS o
+ JOIN image AS i ON o.sha1 = i.sha1
+ LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1
+ GROUP BY o.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webOrphan{}
+ for rows.Next() {
+ var orphan webOrphan
+ if err = rows.Scan(&orphan.SHA1, &orphan.LastPath,
+ &orphan.ThumbW, &orphan.ThumbH, &orphan.Tags); err != nil {
+ return nil, err
+ }
+
+ orphan.Replacement, err = getOrphanReplacement(orphan.LastPath)
+ if err != nil {
+ return nil, err
+ }
+
+ result = append(result, orphan)
+ }
+ return result, rows.Err()
+}
+
+func handleAPIOrphans(w http.ResponseWriter, r *http.Request) {
+ var params struct{}
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ result, err := getOrphans()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func getImageDimensions(sha1 string) (w int64, h int64, err error) {
+ err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`,
+ sha1).Scan(&w, &h)
+ return
+}
+
+func getImagePaths(sha1 string) (paths []string, err error) {
+ rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS (
+ SELECT parent, name AS path FROM node WHERE sha1 = ?
+ UNION ALL
+ SELECT n.parent, n.name || '/' || p.path
+ FROM node AS n JOIN paths AS p ON n.id = p.parent
+ ) SELECT path FROM paths WHERE parent IS NULL`, sha1)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ paths = []string{}
+ for rows.Next() {
+ var path string
+ if err := rows.Scan(&path); err != nil {
+ return nil, err
+ }
+ paths = append(paths, path)
+ }
+ return paths, rows.Err()
+}
+
+func getImageTags(sha1 string) (map[string]map[string]float32, error) {
+ rows, err := db.Query(`
+ SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta
+ JOIN tag AS t ON t.id = ta.tag
+ JOIN tag_space AS ts ON ts.id = t.space
+ WHERE ta.sha1 = ?`, sha1)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result := make(map[string]map[string]float32)
+ for rows.Next() {
+ var (
+ space, tag string
+ weight float32
+ )
+ if err := rows.Scan(&space, &tag, &weight); err != nil {
+ return nil, err
+ }
+
+ tags := result[space]
+ if tags == nil {
+ tags = make(map[string]float32)
+ result[space] = tags
+ }
+ tags[tag] = weight
+ }
+ return result, rows.Err()
+}
+
+func handleAPIInfo(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ SHA1 string
+ }
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Width int64 `json:"width"`
+ Height int64 `json:"height"`
+ Paths []string `json:"paths"`
+ Tags map[string]map[string]float32 `json:"tags"`
+ }
+
+ var err error
+ result.Width, result.Height, err = getImageDimensions(params.SHA1)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ result.Paths, err = getImagePaths(params.SHA1)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ result.Tags, err = getImageTags(params.SHA1)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webSimilarImage struct {
+ SHA1 string `json:"sha1"`
+ PixelsRatio float32 `json:"pixelsRatio"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Paths []string `json:"paths"`
+}
+
+func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (
+ result []webSimilarImage, err error) {
+ // For distance ∈ {0, 1}, this query is quite inefficient.
+ // In exchange, it's generic.
+ //
+ // If there's a dhash, there should also be thumbnail dimensions,
+ // so not bothering with IFNULL on them.
+ rows, err := db.Query(`
+ SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
+ FROM image WHERE sha1 <> ? AND dhash IS NOT NULL
+ AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webSimilarImage{}
+ for rows.Next() {
+ var (
+ match webSimilarImage
+ matchPixels int64
+ )
+ if err = rows.Scan(&match.SHA1,
+ &matchPixels, &match.ThumbW, &match.ThumbH); err != nil {
+ return nil, err
+ }
+ if match.Paths, err = getImagePaths(match.SHA1); err != nil {
+ return nil, err
+ }
+ match.PixelsRatio = float32(matchPixels) / float32(pixels)
+ result = append(result, match)
+ }
+ return result, rows.Err()
+}
+
+func getSimilarGroups(sha1 string, dhash int64, pixels int64,
+ output map[string][]webSimilarImage) error {
+ var err error
+ for distance := 0; distance <= 1; distance++ {
+ output[fmt.Sprintf("Perceptual distance %d", distance)], err =
+ getSimilar(sha1, dhash, pixels, distance)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ SHA1 string
+ }
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Info webSimilarImage `json:"info"`
+ Groups map[string][]webSimilarImage `json:"groups"`
+ }
+
+ result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1}
+ if paths, err := getImagePaths(params.SHA1); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ } else {
+ result.Info.Paths = paths
+ }
+
+ var (
+ width, height int64
+ dhash sql.NullInt64
+ )
+ err := db.QueryRow(`
+ SELECT width, height, dhash, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
+ FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &dhash,
+ &result.Info.ThumbW, &result.Info.ThumbH)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ result.Groups = make(map[string][]webSimilarImage)
+ if dhash.Valid {
+ if err := getSimilarGroups(
+ params.SHA1, dhash.Int64, width*height, result.Groups); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// NOTE: AND will mean MULTIPLY(IFNULL(ta.weight, 0)) per SHA1.
+const searchCTE = `WITH
+ matches(sha1, thumbw, thumbh, score) AS (
+ SELECT i.sha1, i.thumbw, i.thumbh, ta.weight AS score
+ FROM tag_assignment AS ta
+ JOIN image AS i ON i.sha1 = ta.sha1
+ WHERE ta.tag = ?
+ ),
+ supertags(tag) AS (
+ SELECT DISTINCT ta.tag
+ FROM tag_assignment AS ta
+ JOIN matches AS m ON m.sha1 = ta.sha1
+ ),
+ scoredtags(tag, score) AS (
+ -- The cross join is a deliberate optimization,
+ -- and this query may still be really slow.
+ SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score
+ FROM matches AS m
+ CROSS JOIN supertags AS st
+ LEFT JOIN tag_assignment AS ta
+ ON ta.sha1 = m.sha1 AND ta.tag = st.tag
+ GROUP BY st.tag
+ -- Using the column alias doesn't fail, but it also doesn't work.
+ HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01
+ )
+`
+
+type webTagMatch struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Score float32 `json:"score"`
+}
+
+func getTagMatches(tag int64) (matches []webTagMatch, err error) {
+ rows, err := db.Query(searchCTE+`
+ SELECT sha1, IFNULL(thumbw, 0), IFNULL(thumbh, 0), score
+ FROM matches`, tag)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ matches = []webTagMatch{}
+ for rows.Next() {
+ var match webTagMatch
+ if err = rows.Scan(&match.SHA1,
+ &match.ThumbW, &match.ThumbH, &match.Score); err != nil {
+ return nil, err
+ }
+ matches = append(matches, match)
+ }
+ return matches, rows.Err()
+}
+
+type webTagRelated struct {
+ Tag string `json:"tag"`
+ Score float32 `json:"score"`
+}
+
+func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) {
+ rows, err := db.Query(searchCTE+`
+ SELECT ts.name, t.name, st.score FROM scoredtags AS st
+ JOIN tag AS t ON st.tag = t.id
+ JOIN tag_space AS ts ON ts.id = t.space
+ ORDER BY st.score DESC`, tag)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = make(map[string][]webTagRelated)
+ for rows.Next() {
+ var (
+ space string
+ r webTagRelated
+ )
+ if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil {
+ return nil, err
+ }
+ result[space] = append(result[space], r)
+ }
+ return result, rows.Err()
+}
+
+func handleAPISearch(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Query string
+ }
+ if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Matches []webTagMatch `json:"matches"`
+ Related map[string][]webTagRelated `json:"related"`
+ }
+
+ space, tag, _ := strings.Cut(params.Query, ":")
+
+ var tagID int64
+ err := db.QueryRow(`
+ SELECT t.id FROM tag AS t
+ JOIN tag_space AS ts ON t.space = ts.id
+ WHERE ts.name = ? AND t.name = ?`, space, tag).Scan(&tagID)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if result.Matches, err = getTagMatches(tagID); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if result.Related, err = getTagRelated(tagID); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// cmdWeb runs a web UI against GD on ADDRESS.
+func cmdWeb(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ address := fs.Arg(1)
+
+ // This separation is not strictly necessary,
+ // but having an elementary level of security doesn't hurt either.
+ staticHandler = http.FileServer(http.Dir("public"))
+
+ http.HandleFunc("/", handleRequest)
+ http.HandleFunc("/image/", handleImages)
+ http.HandleFunc("/thumb/", handleThumbs)
+ http.HandleFunc("/api/browse", handleAPIBrowse)
+ http.HandleFunc("/api/tags", handleAPITags)
+ http.HandleFunc("/api/duplicates", handleAPIDuplicates)
+ http.HandleFunc("/api/orphans", handleAPIOrphans)
+ http.HandleFunc("/api/info", handleAPIInfo)
+ http.HandleFunc("/api/similar", handleAPISimilar)
+ http.HandleFunc("/api/search", handleAPISearch)
+
+ host, port, err := net.SplitHostPort(address)
+ if err != nil {
+ log.Println(err)
+ } else if host == "" {
+ log.Println("http://" + net.JoinHostPort("localhost", port))
+ } else {
+ log.Println("http://" + address)
+ }
+
+ s := &http.Server{
+ Addr: address,
+ ReadTimeout: 60 * time.Second,
+ WriteTimeout: 60 * time.Second,
+ MaxHeaderBytes: 32 << 10,
+ }
+ return s.ListenAndServe()
+}
+
+// --- Sync --------------------------------------------------------------------
+
+type syncFileInfo struct {
+ dbID int64 // DB node ID, or zero if there was none
+ dbParent int64 // where the file was to be stored
+ dbName string // the name under which it was to be stored
+ fsPath string // symlink target
+ fsMtime int64 // last modified Unix timestamp, used a bit like an ID
+
+ err error // any processing error
+ sha1 string // raw content hash, empty to skip file
+ width int // image width in pixels
+ height int // image height in pixels
+}
+
+type syncContext struct {
+ ctx context.Context
+ tx *sql.Tx
+ info chan syncFileInfo
+ pb *progressBar
+
+ stmtOrphan *sql.Stmt
+ stmtDisposeSub *sql.Stmt
+ stmtDisposeAll *sql.Stmt
+
+ // linked tracks which image hashes we've checked so far in the run.
+ linked map[string]struct{}
+}
+
+func syncPrintf(c *syncContext, format string, v ...any) {
+ c.pb.Interrupt(func() { log.Printf(format+"\n", v...) })
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type syncNode struct {
+ dbID int64
+ dbName string
+ dbMtime int64
+ dbSHA1 string
+}
+
+func (n *syncNode) dbIsDir() bool { return n.dbSHA1 == "" }
+
+type syncFile struct {
+ fsName string
+ fsMtime int64
+ fsIsDir bool
+}
+
+type syncPair struct {
+ db *syncNode
+ fs *syncFile
+}
+
+// syncGetNodes returns direct children of a DB node, ordered by name.
+// SQLite, like Go, compares strings byte-wise by default.
+func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) {
+ // This works even for the root, which doesn't exist as a DB node.
+ rows, err := tx.Query(`SELECT id, name, IFNULL(mtime, 0), IFNULL(sha1, '')
+ FROM node WHERE IFNULL(parent, 0) = ? ORDER BY name`, dbParent)
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var node syncNode
+ if err = rows.Scan(&node.dbID,
+ &node.dbName, &node.dbMtime, &node.dbSHA1); err != nil {
+ return
+ }
+ nodes = append(nodes, node)
+ }
+ return nodes, rows.Err()
+}
+
+// syncGetFiles returns direct children of a FS directory, ordered by name.
+func syncGetFiles(fsPath string) (files []syncFile, err error) {
+ dir, err := os.Open(fsPath)
+ if err != nil {
+ return
+ }
+ defer dir.Close()
+
+ entries, err := dir.ReadDir(0)
+ if err != nil {
+ return
+ }
+
+ for _, entry := range entries {
+ info, err := entry.Info()
+ if err != nil {
+ return files, err
+ }
+
+ files = append(files, syncFile{
+ fsName: entry.Name(),
+ fsMtime: info.ModTime().Unix(),
+ fsIsDir: entry.IsDir(),
+ })
+ }
+ sort.Slice(files,
+ func(a, b int) bool { return files[a].fsName < files[b].fsName })
+ return
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func syncIsImage(path string) (bool, error) {
+ out, err := exec.Command("xdg-mime", "query", "filetype", path).Output()
+ if err != nil {
+ return false, err
+ }
+
+ return bytes.HasPrefix(out, []byte("image/")), nil
+}
+
+func syncPingImage(path string) (int, int, error) {
+ out, err := exec.Command("magick", "identify", "-limit", "thread", "1",
+ "-ping", "-format", "%w %h", path+"[0]").Output()
+ if err != nil {
+ return 0, 0, err
+ }
+
+ var w, h int
+ _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
+ return w, h, err
+}
+
+func syncProcess(c *syncContext, info *syncFileInfo) error {
+ // Skip videos, which ImageMagick can process, but we don't want it to,
+ // so that they're not converted 1:1 to WebP.
+ pathIsImage, err := syncIsImage(info.fsPath)
+ if err != nil {
+ return err
+ }
+ if !pathIsImage {
+ return nil
+ }
+
+ info.width, info.height, err = syncPingImage(info.fsPath)
+ if err != nil {
+ return err
+ }
+
+ f, err := os.Open(info.fsPath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ // We could make this at least somewhat interruptible by c.ctx,
+ // though it would still work poorly.
+ hash := sha1.New()
+ _, err = io.CopyBuffer(hash, f, make([]byte, 65536))
+ if err != nil {
+ return err
+ }
+
+ info.sha1 = hex.EncodeToString(hash.Sum(nil))
+ return nil
+}
+
+// syncEnqueue runs file scanning, which can be CPU and I/O expensive,
+// in parallel. The goroutine only touches the filesystem, read-only.
+func syncEnqueue(c *syncContext, info syncFileInfo) error {
+ if err := taskSemaphore.acquire(c.ctx); err != nil {
+ return err
+ }
+
+ go func(info syncFileInfo) {
+ defer taskSemaphore.release()
+ info.err = syncProcess(c, &info)
+ c.info <- info
+ }(info)
+ return nil
+}
+
+// syncDequeue flushes the result queue of finished asynchronous tasks.
+func syncDequeue(c *syncContext) error {
+ for {
+ select {
+ case <-c.ctx.Done():
+ return c.ctx.Err()
+ case info := <-c.info:
+ if err := syncPostProcess(c, info); err != nil {
+ return err
+ }
+ default:
+ return nil
+ }
+ }
+}
+
+// syncDispose creates orphan records for the entire subtree given by nodeID
+// as appropriate, then deletes all nodes within the subtree. The subtree root
+// node is not deleted if "keepNode" is true.
+//
+// Orphans keep their thumbnail files, as evidence.
+func syncDispose(c *syncContext, nodeID int64, keepNode bool) error {
+ if _, err := c.stmtOrphan.Exec(nodeID); err != nil {
+ return err
+ }
+
+ if keepNode {
+ if _, err := c.stmtDisposeSub.Exec(nodeID); err != nil {
+ return err
+ }
+ } else {
+ if _, err := c.stmtDisposeAll.Exec(nodeID); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncImageResave(c *syncContext, path string, target string) error {
+ dirname, _ := filepath.Split(path)
+ if err := os.MkdirAll(dirname, 0755); err != nil {
+ return err
+ }
+
+ for {
+ // Try to remove anything standing in the way.
+ err := os.Remove(path)
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ // TODO: Make it possible to copy or reflink (ioctl FICLONE).
+ err = os.Symlink(target, path)
+ if err == nil || !errors.Is(err, fs.ErrExist) {
+ return err
+ }
+ }
+}
+
+func syncImageSave(c *syncContext, sha1 string, target string) error {
+ if _, ok := c.linked[sha1]; ok {
+ return nil
+ }
+
+ ok, path := false, imagePath(sha1)
+ if link, err := os.Readlink(path); err == nil {
+ ok = link == target
+ } else {
+ // If it exists, but it is not a symlink, let it be.
+ // Even though it may not be a regular file.
+ ok = errors.Is(err, syscall.EINVAL)
+ }
+
+ if !ok {
+ if err := syncImageResave(c, path, target); err != nil {
+ return err
+ }
+ }
+
+ c.linked[sha1] = struct{}{}
+ return nil
+}
+
+func syncImage(c *syncContext, info syncFileInfo) error {
+ if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height)
+ VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`,
+ info.sha1, info.width, info.height); err != nil {
+ return err
+ }
+
+ return syncImageSave(c, info.sha1, info.fsPath)
+}
+
+func syncPostProcess(c *syncContext, info syncFileInfo) error {
+ defer c.pb.Step()
+
+ // TODO: When replacing an image node (whether it has or doesn't have
+ // other links to keep it alive), we could offer copying all tags,
+ // though this needs another table to track it.
+ // (If it's equivalent enough, the dhash will stay the same,
+ // so user can resolve this through the duplicates feature.)
+ switch {
+ case info.err != nil:
+ // * → error
+ if ee, ok := info.err.(*exec.ExitError); ok {
+ syncPrintf(c, "%s: %s", info.fsPath, ee.Stderr)
+ } else {
+ return info.err
+ }
+ fallthrough
+
+ case info.sha1 == "":
+ // 0 → 0
+ if info.dbID == 0 {
+ return nil
+ }
+
+ // D → 0, F → 0
+ // TODO: Make it possible to disable removal (for copying only?)
+ return syncDispose(c, info.dbID, false /*keepNode*/)
+
+ case info.dbID == 0:
+ // 0 → F
+ if err := syncImage(c, info); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`INSERT INTO node(parent, name, mtime, sha1)
+ VALUES (?, ?, ?, ?)`,
+ info.dbParent, info.dbName, info.fsMtime, info.sha1); err != nil {
+ return err
+ }
+ return nil
+
+ default:
+ // D → F, F → F (this statement is a no-op with the latter)
+ if err := syncDispose(c, info.dbID, true /*keepNode*/); err != nil {
+ return err
+ }
+
+ // Even if the hash didn't change, see comment in syncDirectoryPair().
+ if err := syncImage(c, info); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`UPDATE node SET mtime = ?, sha1 = ?
+ WHERE id = ?`, info.fsMtime, info.sha1, info.dbID); err != nil {
+ return err
+ }
+ return nil
+ }
+}
+
+func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string,
+ pair syncPair) error {
+ db, fs, fsInfo := pair.db, pair.fs, syncFileInfo{dbParent: dbParent}
+ if db != nil {
+ fsInfo.dbID = db.dbID
+ }
+ if fs != nil {
+ fsInfo.dbName = fs.fsName
+ fsInfo.fsPath = filepath.Join(fsPath, fs.fsName)
+ fsInfo.fsMtime = fs.fsMtime
+ }
+
+ switch {
+ case db == nil && fs == nil:
+ // 0 → 0, unreachable.
+
+ case db == nil && fs.fsIsDir:
+ // 0 → D
+ var id int64
+ if result, err := c.tx.Exec(`INSERT INTO node(parent, name)
+ VALUES (?, ?)`, dbParent, fs.fsName); err != nil {
+ return err
+ } else if id, err = result.LastInsertId(); err != nil {
+ return err
+ }
+ return syncDirectory(c, id, fsInfo.fsPath)
+
+ case db == nil:
+ // 0 → F (or 0 → 0)
+ return syncEnqueue(c, fsInfo)
+
+ case fs == nil:
+ // D → 0, F → 0
+ // TODO: Make it possible to disable removal (for copying only?)
+ return syncDispose(c, db.dbID, false /*keepNode*/)
+
+ case db.dbIsDir() && fs.fsIsDir:
+ // D → D
+ return syncDirectory(c, db.dbID, fsInfo.fsPath)
+
+ case db.dbIsDir():
+ // D → F (or D → 0)
+ return syncEnqueue(c, fsInfo)
+
+ case fs.fsIsDir:
+ // F → D
+ if err := syncDispose(c, db.dbID, true /*keepNode*/); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`UPDATE node
+ SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil {
+ return err
+ }
+ return syncDirectory(c, db.dbID, fsInfo.fsPath)
+
+ case db.dbMtime != fs.fsMtime:
+ // F → F (or F → 0)
+ // Assuming that any content modifications change the timestamp.
+ return syncEnqueue(c, fsInfo)
+
+ default:
+ // F → F
+ // Try to fix symlinks, to handle the following situations:
+ // 1. Image A occurs in paths 1 and 2, we use a symlink to path 1,
+ // and path 1 is removed from the filesystem:
+ // path 2 would not resolve if the mtime didn't change.
+ // 2. Image A occurs in paths 1 and 2, we use a symlink to path 1,
+ // and path 1 is changed:
+ // path 2 would resolve to the wrong file.
+ // This may relink images with multiple occurences unnecessarily,
+ // but it will always fix the roots that are being synced.
+ if err := syncImageSave(c, db.dbSHA1, fsInfo.fsPath); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
+ db, err := syncGetNodes(c.tx, dbParent)
+ if err != nil {
+ return err
+ }
+
+ fs, err := syncGetFiles(fsPath)
+ if err != nil {
+ return err
+ }
+
+ // This would not be fatal, but it has annoying consequences.
+ if _, ok := slices.BinarySearchFunc(fs, syncFile{fsName: nameOfDB},
+ func(a, b syncFile) int {
+ return strings.Compare(a.fsName, b.fsName)
+ }); ok {
+ syncPrintf(c, "%s may be a gallery directory, treating as empty",
+ fsPath)
+ fs = nil
+ }
+
+ // Convert differences to a form more convenient for processing.
+ iDB, iFS, pairs := 0, 0, []syncPair{}
+ for iDB < len(db) && iFS < len(fs) {
+ if db[iDB].dbName == fs[iFS].fsName {
+ pairs = append(pairs, syncPair{&db[iDB], &fs[iFS]})
+ iDB++
+ iFS++
+ } else if db[iDB].dbName < fs[iFS].fsName {
+ pairs = append(pairs, syncPair{&db[iDB], nil})
+ iDB++
+ } else {
+ pairs = append(pairs, syncPair{nil, &fs[iFS]})
+ iFS++
+ }
+ }
+ for i := range db[iDB:] {
+ pairs = append(pairs, syncPair{&db[iDB+i], nil})
+ }
+ for i := range fs[iFS:] {
+ pairs = append(pairs, syncPair{nil, &fs[iFS+i]})
+ }
+
+ for _, pair := range pairs {
+ if err := syncDequeue(c); err != nil {
+ return err
+ }
+ if err := syncDirectoryPair(c, dbParent, fsPath, pair); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncRoot(c *syncContext, dbPath []string, fsPath string) error {
+ // TODO: Support synchronizing individual files.
+ // This can only be treated as 0 → F, F → F, or D → F, that is,
+ // a variation on current syncEnqueue(), but dbParent must be nullable.
+
+ // Figure out a database root (not trying to convert F → D on conflict,
+ // also because we don't know yet if the argument is a directory).
+ //
+ // Synchronizing F → D or * → F are special cases not worth implementing.
+ dbParent, err := idForDirectoryPath(c.tx, dbPath, true)
+ if err != nil {
+ return err
+ }
+ if err := syncDirectory(c, dbParent, fsPath); err != nil {
+ return err
+ }
+
+ // Wait for all tasks to finish, and process the results of their work.
+ for i := 0; i < cap(taskSemaphore); i++ {
+ if err := taskSemaphore.acquire(c.ctx); err != nil {
+ return err
+ }
+ }
+ if err := syncDequeue(c); err != nil {
+ return err
+ }
+
+ // This is not our semaphore, so prepare it for the next user.
+ for i := 0; i < cap(taskSemaphore); i++ {
+ taskSemaphore.release()
+ }
+
+ // Delete empty directories, from the bottom of the tree up to,
+ // but not including, the inserted root.
+ //
+ // We need to do this at the end due to our recursive handling,
+ // as well as because of asynchronous file filtering.
+ stmt, err := c.tx.Prepare(`
+ WITH RECURSIVE subtree(id, parent, sha1, level) AS (
+ SELECT id, parent, sha1, 1 FROM node WHERE id = ?
+ UNION ALL
+ SELECT n.id, n.parent, n.sha1, s.level + 1
+ FROM node AS n JOIN subtree AS s ON n.parent = s.id
+ ) DELETE FROM node WHERE id IN (
+ SELECT id FROM subtree WHERE level <> 1 AND sha1 IS NULL
+ AND id NOT IN (SELECT parent FROM node WHERE parent IS NOT NULL)
+ )`)
+ if err != nil {
+ return err
+ }
+
+ for {
+ if result, err := stmt.Exec(dbParent); err != nil {
+ return err
+ } else if n, err := result.RowsAffected(); err != nil {
+ return err
+ } else if n == 0 {
+ return nil
+ }
+ }
+}
+
+type syncPath struct {
+ db []string // database path, in terms of nodes
+ fs string // normalized filesystem path
+}
+
+// syncResolveRoots normalizes filesystem paths given in command line arguments,
+// and figures out a database path for each. Duplicates are skipped or rejected.
+func syncResolveRoots(args []string, fullpaths bool) (
+ roots []*syncPath, err error) {
+ for i := range args {
+ fs, err := filepath.Abs(filepath.Clean(args[i]))
+ if err != nil {
+ return nil, err
+ }
+
+ roots = append(roots,
+ &syncPath{decodeWebPath(filepath.ToSlash(fs)), fs})
+ }
+
+ if fullpaths {
+ // Filter out duplicates. In this case, they're just duplicated work.
+ slices.SortFunc(roots, func(a, b *syncPath) int {
+ return strings.Compare(a.fs, b.fs)
+ })
+ roots = slices.CompactFunc(roots, func(a, b *syncPath) bool {
+ if a.fs != b.fs && !strings.HasPrefix(b.fs, a.fs+"/") {
+ return false
+ }
+ log.Printf("asking to sync path twice: %s\n", b.fs)
+ return true
+ })
+ } else {
+ // Keep just the basenames.
+ for _, path := range roots {
+ if len(path.db) > 0 {
+ path.db = path.db[len(path.db)-1:]
+ }
+ }
+
+ // Different filesystem paths mapping to the same DB location
+ // are definitely a problem we would like to avoid,
+ // otherwise we don't care.
+ slices.SortFunc(roots, func(a, b *syncPath) int {
+ return slices.Compare(a.db, b.db)
+ })
+ for i := 1; i < len(roots); i++ {
+ if slices.Equal(roots[i-1].db, roots[i].db) {
+ return nil, fmt.Errorf("duplicate root: %v", roots[i].db)
+ }
+ }
+ }
+ return
+}
+
+const disposeCTE = `WITH RECURSIVE
+ root(id, sha1, parent, path) AS (
+ SELECT id, sha1, parent, name FROM node WHERE id = ?
+ UNION ALL
+ SELECT r.id, r.sha1, n.parent, n.name || '/' || r.path
+ FROM node AS n JOIN root AS r ON n.id = r.parent
+ ),
+ children(id, sha1, path, level) AS (
+ SELECT id, sha1, path, 1 FROM root WHERE parent IS NULL
+ UNION ALL
+ SELECT n.id, n.sha1, c.path || '/' || n.name, c.level + 1
+ FROM node AS n JOIN children AS c ON n.parent = c.id
+ ),
+ removed(sha1, count, path) AS (
+ SELECT sha1, COUNT(*) AS count, MIN(path) AS path
+ FROM children
+ GROUP BY sha1
+ ),
+ orphaned(sha1, path, count, total) AS (
+ SELECT r.sha1, r.path, r.count, COUNT(*) AS total
+ FROM removed AS r
+ JOIN node ON node.sha1 = r.sha1
+ GROUP BY node.sha1
+ HAVING count = total
+ )`
+
+// cmdSync ensures the given (sub)roots are accurately reflected
+// in the database.
+func cmdSync(fs *flag.FlagSet, args []string) error {
+ fullpaths := fs.Bool("fullpaths", false, "don't basename arguments")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ roots, err := syncResolveRoots(fs.Args()[1:], *fullpaths)
+ if err != nil {
+ return err
+ }
+
+ ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
+ defer stop()
+
+ // In case of a failure during processing, the only retained side effects
+ // on the filesystem tree are:
+ // - Fixing dead symlinks to images.
+ // - Creating symlinks to images that aren't used by anything.
+ tx, err := db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ // Mild hack: upgrade the transaction to a write one straight away,
+ // in order to rule out deadlocks (preventable failure).
+ if _, err := tx.Exec(`END TRANSACTION;
+ BEGIN IMMEDIATE TRANSACTION`); err != nil {
+ return err
+ }
+
+ c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1),
+ linked: make(map[string]struct{})}
+ defer c.pb.Stop()
+
+ if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
+ INSERT OR IGNORE INTO orphan(sha1, path)
+ SELECT sha1, path FROM orphaned`); err != nil {
+ return err
+ }
+ if c.stmtDisposeSub, err = c.tx.Prepare(disposeCTE + `
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children WHERE level <> 1)`); err != nil {
+ return err
+ }
+ if c.stmtDisposeAll, err = c.tx.Prepare(disposeCTE + `
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children)`); err != nil {
+ return err
+ }
+
+ // Info tasks take a position in the task semaphore channel.
+ // then fill the info channel.
+ //
+ // Immediately after syncDequeue(), the info channel is empty,
+ // but the semaphore might be full.
+ //
+ // By having at least one position in the info channel,
+ // we allow at least one info task to run to semaphore release,
+ // so that syncEnqueue() doesn't deadlock.
+ //
+ // By making it the same size as the semaphore,
+ // the end of this function doesn't need to dequeue while waiting.
+ // It also prevents goroutine leaks despite leaving them running--
+ // once they finish their job, they're gone,
+ // and eventually the info channel would get garbage collected.
+ //
+ // The additional slot is there to handle the one result
+ // that may be placed while syncEnqueue() waits for the semaphore,
+ // i.e., it is for the result of the task that syncEnqueue() spawns.
+ c.info = make(chan syncFileInfo, cap(taskSemaphore)+1)
+
+ for _, root := range roots {
+ if err := syncRoot(&c, root.db, root.fs); err != nil {
+ return err
+ }
+ }
+ return tx.Commit()
+}
+
+// --- Removal -----------------------------------------------------------------
+
+// cmdRemove is for manual removal of subtrees from the database.
+// Beware that inputs are database, not filesystem paths.
+func cmdRemove(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ tx, err := db.BeginTx(context.Background(), nil)
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ for _, path := range fs.Args()[1:] {
+ var id sql.NullInt64
+ for _, name := range decodeWebPath(path) {
+ if err := tx.QueryRow(`SELECT id FROM node
+ WHERE parent IS ? AND name = ?`,
+ id, name).Scan(&id); err != nil {
+ return err
+ }
+ }
+ if id.Int64 == 0 {
+ return errors.New("can't remove root")
+ }
+
+ if _, err = tx.Exec(disposeCTE+`
+ INSERT OR IGNORE INTO orphan(sha1, path)
+ SELECT sha1, path FROM orphaned`, id); err != nil {
+ return err
+ }
+ if _, err = tx.Exec(disposeCTE+`
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children)`, id); err != nil {
+ return err
+ }
+ }
+ return tx.Commit()
+}
+
+// --- Tagging -----------------------------------------------------------------
+
+// cmdTag mass imports tags from data passed on stdin as a TSV
+// of SHA1 TAG WEIGHT entries.
+func cmdTag(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 || fs.NArg() > 3 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ space := fs.Arg(1)
+
+ var description sql.NullString
+ if fs.NArg() >= 3 {
+ description = sql.NullString{String: fs.Arg(2), Valid: true}
+ }
+
+ // Note that starting as a write transaction prevents deadlocks.
+ // Imports are rare, and just bulk load data, so this scope is fine.
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description)
+ VALUES (?, ?)`, space, description); err != nil {
+ return err
+ }
+
+ var spaceID int64
+ if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`,
+ space).Scan(&spaceID); err != nil {
+ return err
+ }
+
+ // XXX: It might make sense to pre-erase all tag assignments within
+ // the given space for that image, the first time we see it:
+ //
+ // DELETE FROM tag_assignment
+ // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?)
+ //
+ // or even just clear the tag space completely:
+ //
+ // DELETE FROM tag_assignment
+ // WHERE tag IN (SELECT id FROM tag WHERE space = ?);
+ // DELETE FROM tag WHERE space = ?;
+ stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight)
+ VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?)
+ ON CONFLICT DO UPDATE SET weight = ?`)
+ if err != nil {
+ return err
+ }
+
+ scanner := bufio.NewScanner(os.Stdin)
+ for scanner.Scan() {
+ fields := strings.Split(scanner.Text(), "\t")
+ if len(fields) != 3 {
+ return errors.New("invalid input format")
+ }
+
+ sha1, tag := fields[0], fields[1]
+ weight, err := strconv.ParseFloat(fields[2], 64)
+ if err != nil {
+ return err
+ }
+
+ if _, err := tx.Exec(
+ `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`,
+ spaceID, tag); err != nil {
+ return nil
+ }
+ if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil {
+ log.Printf("%s: %s\n", sha1, err)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return err
+ }
+ return tx.Commit()
+}
+
+// --- Check -------------------------------------------------------------------
+
+func isValidSHA1(hash string) bool {
+ if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash {
+ return false
+ }
+ if _, err := hex.DecodeString(hash); err != nil {
+ return false
+ }
+ return true
+}
+
+func hashesToFileListing(root, suffix string, hashes []string) []string {
+ // Note that we're semi-duplicating {image,thumb}Path().
+ paths := []string{root}
+ for _, hash := range hashes {
+ dir := filepath.Join(root, hash[:2])
+ paths = append(paths, dir, filepath.Join(dir, hash+suffix))
+ }
+ slices.Sort(paths)
+ return slices.Compact(paths)
+}
+
+func collectFileListing(root string) (paths []string, err error) {
+ err = filepath.WalkDir(root,
+ func(path string, d fs.DirEntry, err error) error {
+ paths = append(paths, path)
+ return err
+ })
+
+ // Even though it should already be sorted somehow.
+ slices.Sort(paths)
+ return
+}
+
+func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
+ db := hashesToFileListing(root, suffix, hashes)
+ fs, err := collectFileListing(root)
+ if err != nil {
+ return false, nil, err
+ }
+
+ iDB, iFS, ok, intersection := 0, 0, true, []string{}
+ for iDB < len(db) && iFS < len(fs) {
+ if db[iDB] == fs[iFS] {
+ intersection = append(intersection, db[iDB])
+ iDB++
+ iFS++
+ } else if db[iDB] < fs[iFS] {
+ ok = false
+ fmt.Printf("only in DB: %s\n", db[iDB])
+ iDB++
+ } else {
+ ok = false
+ fmt.Printf("only in FS: %s\n", fs[iFS])
+ iFS++
+ }
+ }
+ for _, path := range db[iDB:] {
+ ok = false
+ fmt.Printf("only in DB: %s\n", path)
+ }
+ for _, path := range fs[iFS:] {
+ ok = false
+ fmt.Printf("only in FS: %s\n", path)
+ }
+ return ok, intersection, nil
+}
+
+func checkHash(path string) (message string, err error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return err.Error(), nil
+ }
+ defer f.Close()
+
+ // We get 2 levels of parent directories in here, just filter them out.
+ if fi, err := f.Stat(); err != nil {
+ return err.Error(), nil
+ } else if fi.IsDir() {
+ return "", nil
+ }
+
+ hash := sha1.New()
+ _, err = io.CopyBuffer(hash, f, make([]byte, 65536))
+ if err != nil {
+ return err.Error(), nil
+ }
+
+ sha1 := hex.EncodeToString(hash.Sum(nil))
+ if sha1 != filepath.Base(path) {
+ return fmt.Sprintf("mismatch, found %s", sha1), nil
+ }
+ return "", nil
+}
+
+func checkHashes(paths []string) (bool, error) {
+ log.Println("checking image hashes")
+ var failed atomic.Bool
+ err := parallelize(paths, func(path string) (string, error) {
+ message, err := checkHash(path)
+ if message != "" {
+ failed.Store(true)
+ }
+ return message, err
+ })
+ return !failed.Load(), err
+}
+
+// cmdCheck carries out various database consistency checks.
+func cmdCheck(fs *flag.FlagSet, args []string) error {
+ full := fs.Bool("full", false, "verify image hashes")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ // Check if hashes are in the right format.
+ log.Println("checking image hashes")
+
+ allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`)
+ if err != nil {
+ return err
+ }
+
+ ok := true
+ for _, hash := range allSHA1 {
+ if !isValidSHA1(hash) {
+ ok = false
+ fmt.Printf("invalid image SHA1: %s\n", hash)
+ }
+ }
+
+ // This is, rather obviously, just a strict subset.
+ // Although it doesn't run in the same transaction.
+ thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`)
+ if err != nil {
+ return err
+ }
+
+ // This somewhat duplicates {image,thumb}Path().
+ log.Println("checking SQL against filesystem")
+ okImages, intersection, err := checkFiles(
+ filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
+ if err != nil {
+ return err
+ }
+
+ okThumbs, _, err := checkFiles(
+ filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
+ if err != nil {
+ return err
+ }
+ if !okImages || !okThumbs {
+ ok = false
+ }
+
+ log.Println("checking for dead symlinks")
+ for _, path := range intersection {
+ if _, err := os.Stat(path); err != nil {
+ ok = false
+ fmt.Printf("%s: %s\n", path, err)
+ }
+ }
+
+ if *full {
+ if ok2, err := checkHashes(intersection); err != nil {
+ return err
+ } else if !ok2 {
+ ok = false
+ }
+ }
+
+ if !ok {
+ return errors.New("detected inconsistencies")
+ }
+ return nil
+}
+
+// --- Thumbnailing ------------------------------------------------------------
+
+func identifyThumbnail(path string) (w, h int, err error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return
+ }
+ defer f.Close()
+
+ config, err := webp.DecodeConfig(f)
+ if err != nil {
+ return
+ }
+ return config.Width, config.Height, nil
+}
+
+func makeThumbnail(load bool, pathImage, pathThumb string) (
+ w, h int, err error) {
+ if load {
+ if w, h, err = identifyThumbnail(pathThumb); err == nil {
+ return
+ }
+ }
+
+ thumbDirname, _ := filepath.Split(pathThumb)
+ if err := os.MkdirAll(thumbDirname, 0755); err != nil {
+ return 0, 0, err
+ }
+
+ // Create a normalized thumbnail. Since we don't particularly need
+ // any complex processing, such as surrounding of metadata,
+ // simply push it through ImageMagick.
+ //
+ // - http://www.ericbrasseur.org/gamma.html
+ // - https://www.imagemagick.org/Usage/thumbnails/
+ // - https://imagemagick.org/script/command-line-options.php#layers
+ //
+ // "info:" output is written for each frame, which is why we delete
+ // all of them but the first one beforehands.
+ //
+ // TODO: See if we can optimize resulting WebP animations.
+ // (Do -layers optimize* apply to this format at all?)
+ cmd := exec.Command("magick", "-limit", "thread", "1", pathImage,
+ "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip",
+ "-resize", "256x128>", "-colorspace", "sRGB",
+ "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:")
+
+ out, err := cmd.Output()
+ if err != nil {
+ return 0, 0, err
+ }
+
+ _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
+ return w, h, err
+}
+
+// cmdThumbnail generates missing thumbnails, in parallel.
+func cmdThumbnail(fs *flag.FlagSet, args []string) error {
+ load := fs.Bool("load", false, "try to load existing thumbnail files")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ hexSHA1 := fs.Args()[1:]
+ if len(hexSHA1) == 0 {
+ // Get all unique images in the database with no thumbnail.
+ var err error
+ hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NULL OR thumbh IS NULL`)
+ if err != nil {
+ return err
+ }
+ }
+
+ stmt, err := db.Prepare(
+ `UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ var mu sync.Mutex
+ return parallelize(hexSHA1, func(sha1 string) (message string, err error) {
+ pathImage := imagePath(sha1)
+ pathThumb := thumbPath(sha1)
+ w, h, err := makeThumbnail(*load, pathImage, pathThumb)
+ if err != nil {
+ if ee, ok := err.(*exec.ExitError); ok {
+ return string(ee.Stderr), nil
+ }
+ return "", err
+ }
+
+ mu.Lock()
+ defer mu.Unlock()
+ _, err = stmt.Exec(w, h, sha1)
+ return "", err
+ })
+}
+
+// --- Perceptual hash ---------------------------------------------------------
+
+type linearImage struct {
+ img image.Image
+}
+
+func newLinearImage(img image.Image) *linearImage {
+ return &linearImage{img: img}
+}
+
+func (l *linearImage) ColorModel() color.Model { return l.img.ColorModel() }
+func (l *linearImage) Bounds() image.Rectangle { return l.img.Bounds() }
+
+func unSRGB(c uint32) uint8 {
+ n := float64(c) / 0xffff
+ if n <= 0.04045 {
+ return uint8(n * (255.0 / 12.92))
+ }
+ return uint8(math.Pow((n+0.055)/(1.055), 2.4) * 255.0)
+}
+
+func (l *linearImage) At(x, y int) color.Color {
+ r, g, b, a := l.img.At(x, y).RGBA()
+ return color.RGBA{
+ R: unSRGB(r), G: unSRGB(g), B: unSRGB(b), A: uint8(a >> 8)}
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// isWebPAnimation returns whether the given ReadSeeker starts a WebP animation.
+// See https://developers.google.com/speed/webp/docs/riff_container
+func isWebPAnimation(rs io.ReadSeeker) (bool, error) {
+ b := make([]byte, 21)
+ if _, err := rs.Read(b); err != nil {
+ return false, err
+ }
+ if _, err := rs.Seek(0, io.SeekStart); err != nil {
+ return false, err
+ }
+
+ return bytes.Equal(b[:4], []byte("RIFF")) &&
+ bytes.Equal(b[8:16], []byte("WEBPVP8X")) &&
+ b[20]&0b00000010 != 0, nil
+}
+
+var errIsAnimation = errors.New("cannot perceptually hash animations")
+
+func dhashWebP(rs io.ReadSeeker) (uint64, error) {
+ if a, err := isWebPAnimation(rs); err != nil {
+ return 0, err
+ } else if a {
+ return 0, errIsAnimation
+ }
+
+ // Doing this entire thing in Go is SLOW, but convenient.
+ source, err := webp.Decode(rs)
+ if err != nil {
+ return 0, err
+ }
+
+ var (
+ linear = newLinearImage(source)
+ resized = image.NewNRGBA64(image.Rect(0, 0, 9, 8))
+ )
+ draw.CatmullRom.Scale(resized, resized.Bounds(),
+ linear, linear.Bounds(), draw.Src, nil)
+
+ var hash uint64
+ for y := 0; y < 8; y++ {
+ var grey [9]float32
+ for x := 0; x < 9; x++ {
+ rgba := resized.NRGBA64At(x, y)
+ grey[x] = 0.2126*float32(rgba.R) +
+ 0.7152*float32(rgba.G) +
+ 0.0722*float32(rgba.B)
+ }
+
+ var row uint64
+ if grey[0] < grey[1] {
+ row |= 1 << 7
+ }
+ if grey[1] < grey[2] {
+ row |= 1 << 6
+ }
+ if grey[2] < grey[3] {
+ row |= 1 << 5
+ }
+ if grey[3] < grey[4] {
+ row |= 1 << 4
+ }
+ if grey[4] < grey[5] {
+ row |= 1 << 3
+ }
+ if grey[5] < grey[6] {
+ row |= 1 << 2
+ }
+ if grey[6] < grey[7] {
+ row |= 1 << 1
+ }
+ if grey[7] < grey[8] {
+ row |= 1 << 0
+ }
+ hash = hash<<8 | row
+ }
+ return hash, nil
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func makeDhash(sha1 string) (uint64, error) {
+ pathThumb := thumbPath(sha1)
+ f, err := os.Open(pathThumb)
+ if err != nil {
+ return 0, err
+ }
+ defer f.Close()
+ return dhashWebP(f)
+}
+
+// cmdDhash computes perceptual hashes from thumbnails.
+func cmdDhash(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ hexSHA1 := fs.Args()[1:]
+ if len(hexSHA1) == 0 {
+ var err error
+ hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NOT NULL AND thumbh IS NOT NULL AND dhash IS NULL`)
+ if err != nil {
+ return err
+ }
+ }
+
+ stmt, err := db.Prepare(`UPDATE image SET dhash = ? WHERE sha1 = ?`)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ var mu sync.Mutex
+ return parallelize(hexSHA1, func(sha1 string) (message string, err error) {
+ hash, err := makeDhash(sha1)
+ if errors.Is(err, errIsAnimation) {
+ // Ignoring this common condition.
+ return "", nil
+ } else if err != nil {
+ return err.Error(), nil
+ }
+
+ mu.Lock()
+ defer mu.Unlock()
+ _, err = stmt.Exec(int64(hash), sha1)
+ return "", err
+ })
+}
+
+// --- Main --------------------------------------------------------------------
+
+var errWrongUsage = errors.New("wrong usage")
+
+var commands = map[string]struct {
+ handler func(*flag.FlagSet, []string) error
+ usage string
+ function string
+}{
+ "init": {cmdInit, "GD", "Initialize a database."},
+ "web": {cmdWeb, "GD ADDRESS", "Launch a web interface."},
+ "tag": {cmdTag, "GD SPACE [DESCRIPTION]", "Import tags."},
+ "sync": {cmdSync, "GD ROOT...", "Synchronise with the filesystem."},
+ "remove": {cmdRemove, "GD PATH...", "Remove database subtrees."},
+ "check": {cmdCheck, "GD", "Run consistency checks."},
+ "thumbnail": {cmdThumbnail, "GD [SHA1...]", "Generate thumbnails."},
+ "dhash": {cmdDhash, "GD [SHA1...]", "Compute perceptual hashes."},
+}
+
+func usage() {
+ f := flag.CommandLine.Output()
+ fmt.Fprintf(f, "Usage: %s COMMAND [ARG...]\n", os.Args[0])
+ flag.PrintDefaults()
+
+ // The alphabetic ordering is unfortunate, but tolerable.
+ keys := []string{}
+ for key := range commands {
+ keys = append(keys, key)
+ }
+ sort.Strings(keys)
+
+ fmt.Fprintf(f, "\nCommands:\n")
+ for _, key := range keys {
+ fmt.Fprintf(f, " %s [OPTION...] %s\n \t%s\n",
+ key, commands[key].usage, commands[key].function)
+ }
+}
+
+func main() {
+ // This implements the -h switch for us by default.
+ // The rest of the handling here closely follows what flag does internally.
+ flag.Usage = usage
+ flag.Parse()
+ if flag.NArg() < 1 {
+ flag.Usage()
+ os.Exit(2)
+ }
+
+ cmd, ok := commands[flag.Arg(0)]
+ if !ok {
+ fmt.Fprintf(flag.CommandLine.Output(),
+ "unknown command: %s\n", flag.Arg(0))
+ flag.Usage()
+ os.Exit(2)
+ }
+
+ fs := flag.NewFlagSet(flag.Arg(0), flag.ExitOnError)
+ fs.Usage = func() {
+ fmt.Fprintf(fs.Output(),
+ "Usage: %s [OPTION...] %s\n%s\n",
+ fs.Name(), cmd.usage, cmd.function)
+ fs.PrintDefaults()
+ }
+
+ taskSemaphore = newSemaphore(runtime.NumCPU())
+ err := cmd.handler(fs, flag.Args()[1:])
+
+ // Note that the database object has a closing finalizer,
+ // we just additionally print any errors coming from there.
+ if db != nil {
+ if err := db.Close(); err != nil {
+ log.Println(err)
+ }
+ }
+
+ if errors.Is(err, errWrongUsage) {
+ fs.Usage()
+ os.Exit(2)
+ } else if err != nil {
+ log.Fatalln(err)
+ }
+}
--
cgit v1.2.3-70-g09d2