aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2023-12-08 02:16:04 +0100
committerPřemysl Eric Janouch <p@janouch.name>2023-12-29 15:02:28 +0100
commit054078908a1e4c7429ea0f5a3a0605addfccc46c (patch)
tree7a6dd29cd4381bd655fa78f5866f25c552d05072
downloadgallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.tar.gz
gallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.tar.xz
gallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.zip
Initial commit
-rw-r--r--LICENSE12
-rw-r--r--Makefile14
-rw-r--r--README14
-rwxr-xr-xgen-initialize.sh6
-rw-r--r--go.mod8
-rw-r--r--go.sum4
-rw-r--r--initialize.sql105
-rw-r--r--main.go2497
-rw-r--r--public/gallery.js675
-rw-r--r--public/style.css102
-rwxr-xr-xtest.sh65
11 files changed, 3502 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..7d13ecd
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,12 @@
+Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..fe30c13
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+.POSIX:
+.SUFFIXES:
+
+outputs = gallery initialize.go public/mithril.js
+all: $(outputs)
+
+gallery: main.go initialize.go
+ go build -tags "" -gcflags="all=-N -l" -o $@
+initialize.go: initialize.sql gen-initialize.sh
+ ./gen-initialize.sh initialize.sql > $@
+public/mithril.js:
+ curl -Lo $@ https://unpkg.com/mithril/mithril.js
+clean:
+ rm -f $(outputs)
diff --git a/README b/README
new file mode 100644
index 0000000..03a34fe
--- /dev/null
+++ b/README
@@ -0,0 +1,14 @@
+This is gallery software designed to maintain a shadow structure
+of your filesystem, in which you can attach metadata to your media,
+and query your collections in various ways.
+
+All media is content-addressed by its SHA-1 hash value, and at your option
+also perceptually hashed. Duplicate search is an essential feature.
+
+Prerequisites: Go, ImageMagick, xdg-utils
+
+The gallery is designed for simplicity, and easy interoperability.
+sqlite3, curl, jq, and the filesystem will take you a long way.
+
+The intended mode of use is running daily automated sync/thumbnail/dhash/tag
+batches in a cron job, or from a system timer. See test.sh for usage hints.
diff --git a/gen-initialize.sh b/gen-initialize.sh
new file mode 100755
index 0000000..8d8cb55
--- /dev/null
+++ b/gen-initialize.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+gofmt <<EOF
+package ${GOPACKAGE:-main}
+
+const initializeSQL = \`$(sed 's/`/` + "`" + `/g' "$@")\`
+EOF
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..6aa6b75
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,8 @@
+module janouch.name/gallery
+
+go 1.21.4
+
+require (
+ github.com/mattn/go-sqlite3 v1.14.19
+ golang.org/x/image v0.14.0
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..8562c23
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,4 @@
+github.com/mattn/go-sqlite3 v1.14.19 h1:fhGleo2h1p8tVChob4I9HpmVFIAkKGpiukdrgQbWfGI=
+github.com/mattn/go-sqlite3 v1.14.19/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
+golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4=
+golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE=
diff --git a/initialize.sql b/initialize.sql
new file mode 100644
index 0000000..5a54a7f
--- /dev/null
+++ b/initialize.sql
@@ -0,0 +1,105 @@
+CREATE TABLE IF NOT EXISTS image(
+ sha1 TEXT NOT NULL, -- SHA-1 hash of file in lowercase hexadecimal
+ width INTEGER NOT NULL, -- cached media width
+ height INTEGER NOT NULL, -- cached media height
+ thumbw INTEGER, -- cached thumbnail width, if known
+ thumbh INTEGER, -- cached thumbnail height, if known
+ dhash INTEGER, -- uint64 perceptual hash as a signed integer
+ CHECK (unhex(sha1) IS NOT NULL AND lower(sha1) = sha1),
+ PRIMARY KEY (sha1)
+) STRICT;
+
+CREATE INDEX IF NOT EXISTS image__dhash ON image(dhash);
+
+--
+
+CREATE TABLE IF NOT EXISTS node(
+ id INTEGER NOT NULL, -- unique ID
+ parent INTEGER REFERENCES node(id), -- root if NULL
+ name TEXT NOT NULL, -- path component
+ mtime INTEGER, -- files: Unix time in seconds
+ sha1 TEXT REFERENCES image(sha1), -- files: content hash
+ PRIMARY KEY (id)
+) STRICT;
+
+CREATE INDEX IF NOT EXISTS node__sha1 ON node(sha1);
+CREATE UNIQUE INDEX IF NOT EXISTS node__parent_name
+ON node(IFNULL(parent, 0), name);
+
+CREATE TRIGGER IF NOT EXISTS node__sha1__check
+BEFORE UPDATE OF sha1 ON node
+WHEN OLD.sha1 IS NULL AND NEW.sha1 IS NOT NULL
+AND EXISTS(SELECT id FROM node WHERE parent = OLD.id)
+BEGIN
+ SELECT RAISE(ABORT, 'trying to turn a non-empty directory into a file');
+END;
+
+/*
+Automatic garbage collection, not sure if it actually makes any sense.
+This needs PRAGMA recursive_triggers = 1; to work properly.
+
+CREATE TRIGGER IF NOT EXISTS node__parent__gc
+AFTER DELETE ON node FOR EACH ROW
+BEGIN
+ DELETE FROM node WHERE id = OLD.parent
+ AND id NOT IN (SELECT DISTINCT parent FROM node);
+END;
+*/
+
+--
+
+CREATE TABLE IF NOT EXISTS orphan(
+ sha1 TEXT NOT NULL REFERENCES image(sha1),
+ path TEXT NOT NULL, -- last occurence within the database hierarchy
+ PRIMARY KEY (sha1)
+) STRICT;
+
+-- Renaming/moving a file can result either in a (ref, unref) or a (unref, ref)
+-- sequence during sync, and I want to get at the same result.
+CREATE TRIGGER IF NOT EXISTS node__sha1__deorphan_insert
+AFTER INSERT ON node
+WHEN NEW.sha1 IS NOT NULL
+BEGIN
+ DELETE FROM orphan WHERE sha1 = NEW.sha1;
+END;
+
+CREATE TRIGGER IF NOT EXISTS node__sha1__deorphan_update
+AFTER UPDATE OF sha1 ON node
+WHEN NEW.sha1 IS NOT NULL
+BEGIN
+ DELETE FROM orphan WHERE sha1 = NEW.sha1;
+END;
+
+--
+
+CREATE TABLE IF NOT EXISTS tag_space(
+ id INTEGER NOT NULL,
+ name TEXT NOT NULL,
+ description TEXT,
+ CHECK (name NOT LIKE '%:%'),
+ PRIMARY KEY (id)
+) STRICT;
+
+CREATE UNIQUE INDEX IF NOT EXISTS tag_space__name ON tag_space(name);
+
+-- To avoid having to deal with NULLs, always create this special tag space.
+INSERT OR IGNORE INTO tag_space(id, name, description)
+VALUES(0, '', 'User-defined tags');
+
+CREATE TABLE IF NOT EXISTS tag(
+ id INTEGER NOT NULL,
+ space INTEGER NOT NULL REFERENCES tag_space(id),
+ name TEXT NOT NULL,
+ PRIMARY KEY (id)
+) STRICT;
+
+CREATE UNIQUE INDEX IF NOT EXISTS tag__space_name ON tag(space, name);
+
+CREATE TABLE IF NOT EXISTS tag_assignment(
+ sha1 TEXT NOT NULL REFERENCES image(sha1),
+ tag INTEGER NOT NULL REFERENCES tag(id),
+ weight REAL NOT NULL, -- 0..1 normalized weight assigned to tag
+ PRIMARY KEY (sha1, tag)
+) STRICT;
+
+CREATE INDEX IF NOT EXISTS tag_assignment__tag ON tag_assignment(tag);
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..95bb502
--- /dev/null
+++ b/main.go
@@ -0,0 +1,2497 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "crypto/sha1"
+ "database/sql"
+ "encoding/hex"
+ "encoding/json"
+ "errors"
+ "flag"
+ "fmt"
+ "html/template"
+ "image"
+ "image/color"
+ "io"
+ "io/fs"
+ "log"
+ "math"
+ "math/bits"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "os/signal"
+ "path/filepath"
+ "regexp"
+ "runtime"
+ "slices"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "syscall"
+ "time"
+
+ "github.com/mattn/go-sqlite3"
+ "golang.org/x/image/draw"
+ "golang.org/x/image/webp"
+)
+
+var (
+ db *sql.DB // sqlite database
+ galleryDirectory string // gallery directory
+
+ // taskSemaphore limits parallel computations.
+ taskSemaphore semaphore
+)
+
+const (
+ nameOfDB = "gallery.db"
+ nameOfImageRoot = "images"
+ nameOfThumbRoot = "thumbs"
+)
+
+func hammingDistance(a, b int64) int {
+ return bits.OnesCount64(uint64(a) ^ uint64(b))
+}
+
+func init() {
+ sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{
+ ConnectHook: func(conn *sqlite3.SQLiteConn) error {
+ return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/)
+ },
+ })
+}
+
+func openDB(directory string) error {
+ var err error
+ db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory,
+ nameOfDB+"?_foreign_keys=1&_busy_timeout=1000"))
+ galleryDirectory = directory
+ return err
+}
+
+func imagePath(sha1 string) string {
+ return filepath.Join(galleryDirectory,
+ nameOfImageRoot, sha1[:2], sha1)
+}
+
+func thumbPath(sha1 string) string {
+ return filepath.Join(galleryDirectory,
+ nameOfThumbRoot, sha1[:2], sha1+".webp")
+}
+
+func dbCollectStrings(query string, a ...any) ([]string, error) {
+ rows, err := db.Query(query, a...)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result := []string{}
+ for rows.Next() {
+ var s string
+ if err := rows.Scan(&s); err != nil {
+ return nil, err
+ }
+ result = append(result, s)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func idForDirectoryPath(tx *sql.Tx, path []string, create bool) (int64, error) {
+ var parent sql.NullInt64
+ for _, name := range path {
+ if err := tx.QueryRow(`SELECT id FROM node
+ WHERE parent IS ? AND name = ? AND sha1 IS NULL`,
+ parent, name).Scan(&parent); err == nil {
+ continue
+ } else if !errors.Is(err, sql.ErrNoRows) {
+ return 0, err
+ } else if !create {
+ return 0, err
+ }
+
+ // This fails when trying to override a leaf node.
+ // That needs special handling.
+ if result, err := tx.Exec(
+ `INSERT INTO node(parent, name) VALUES (?, ?)`,
+ parent, name); err != nil {
+ return 0, err
+ } else if id, err := result.LastInsertId(); err != nil {
+ return 0, err
+ } else {
+ parent = sql.NullInt64{Int64: id, Valid: true}
+ }
+ }
+ return parent.Int64, nil
+}
+
+func decodeWebPath(path string) []string {
+ // Relative paths could be handled differently,
+ // but right now, they're assumed to start at the root.
+ result := []string{}
+ for _, crumb := range strings.Split(path, "/") {
+ if crumb != "" {
+ result = append(result, crumb)
+ }
+ }
+ return result
+}
+
+// --- Semaphore ---------------------------------------------------------------
+
+type semaphore chan struct{}
+
+func newSemaphore(size int) semaphore { return make(chan struct{}, size) }
+func (s semaphore) release() { <-s }
+
+func (s semaphore) acquire(ctx context.Context) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case s <- struct{}{}:
+ }
+
+ // Give priority to context cancellation.
+ select {
+ case <-ctx.Done():
+ s.release()
+ return ctx.Err()
+ default:
+ }
+ return nil
+}
+
+// --- Progress bar ------------------------------------------------------------
+
+type progressBar struct {
+ sync.Mutex
+ current int
+ target int
+}
+
+func newProgressBar(target int) *progressBar {
+ pb := &progressBar{current: 0, target: target}
+ pb.Update()
+ return pb
+}
+
+func (pb *progressBar) Stop() {
+ // The minimum thing that works: just print a newline.
+ os.Stdout.WriteString("\n")
+}
+
+func (pb *progressBar) Update() {
+ if pb.target < 0 {
+ fmt.Printf("\r%d/?", pb.current)
+ return
+ }
+
+ var fraction int
+ if pb.target != 0 {
+ fraction = int(float32(pb.current) / float32(pb.target) * 100)
+ }
+
+ target := fmt.Sprintf("%d", pb.target)
+ fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, fraction)
+}
+
+func (pb *progressBar) Step() {
+ pb.Lock()
+ defer pb.Unlock()
+
+ pb.current++
+ pb.Update()
+}
+
+func (pb *progressBar) Interrupt(callback func()) {
+ pb.Lock()
+ defer pb.Unlock()
+ pb.Stop()
+ defer pb.Update()
+
+ callback()
+}
+
+// --- Parallelization ---------------------------------------------------------
+
+type parallelFunc func(item string) (message string, err error)
+
+// parallelize runs the callback in parallel on a list of strings,
+// reporting progress and any non-fatal messages.
+func parallelize(strings []string, callback parallelFunc) error {
+ pb := newProgressBar(len(strings))
+ defer pb.Stop()
+
+ ctx, cancel := context.WithCancelCause(context.Background())
+ wg := sync.WaitGroup{}
+ for _, item := range strings {
+ if taskSemaphore.acquire(ctx) != nil {
+ break
+ }
+
+ wg.Add(1)
+ go func(item string) {
+ defer taskSemaphore.release()
+ defer wg.Done()
+ if message, err := callback(item); err != nil {
+ cancel(err)
+ } else if message != "" {
+ pb.Interrupt(func() { log.Printf("%s: %s\n", item, message) })
+ }
+ pb.Step()
+ }(item)
+ }
+ wg.Wait()
+ if ctx.Err() != nil {
+ return context.Cause(ctx)
+ }
+ return nil
+}
+
+// --- Initialization ----------------------------------------------------------
+
+// cmdInit initializes a "gallery directory" that contains gallery.sqlite,
+// images, thumbs.
+func cmdInit(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ if _, err := db.Exec(initializeSQL); err != nil {
+ return err
+ }
+
+ // XXX: There's technically no reason to keep images as symlinks,
+ // we might just keep absolute paths in the database as well.
+ if err := os.MkdirAll(
+ filepath.Join(galleryDirectory, nameOfImageRoot), 0755); err != nil {
+ return err
+ }
+ if err := os.MkdirAll(
+ filepath.Join(galleryDirectory, nameOfThumbRoot), 0755); err != nil {
+ return err
+ }
+ return nil
+}
+
+// --- Web ---------------------------------------------------------------------
+
+var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
+var staticHandler http.Handler
+
+var page = template.Must(template.New("/").Parse(`<!DOCTYPE html><html><head>
+ <title>Gallery</title>
+ <meta charset="utf-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel=stylesheet href=style.css>
+</head><body>
+ <noscript>This is a web application, and requires Javascript.</noscript>
+ <script src=mithril.js></script>
+ <script src=gallery.js></script>
+</body></html>`))
+
+func handleRequest(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path != "/" {
+ staticHandler.ServeHTTP(w, r)
+ return
+ }
+ if err := page.Execute(w, nil); err != nil {
+ log.Println(err)
+ }
+}
+
+func handleImages(w http.ResponseWriter, r *http.Request) {
+ if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
+ http.NotFound(w, r)
+ } else {
+ http.ServeFile(w, r, imagePath(m[1]))
+ }
+}
+
+func handleThumbs(w http.ResponseWriter, r *http.Request) {
+ if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
+ http.NotFound(w, r)
+ } else {
+ http.ServeFile(w, r, thumbPath(m[1]))
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) {
+ return dbCollectStrings(`SELECT name FROM node
+ WHERE IFNULL(parent, 0) = ? AND sha1 IS NULL`, parent)
+}
+
+type webEntry struct {
+ SHA1 string `json:"sha1"`
+ Name string `json:"name"`
+ Modified int64 `json:"modified"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+}
+
+func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) {
+ rows, err := tx.Query(`
+ SELECT i.sha1, n.name, n.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0)
+ FROM node AS n
+ JOIN image AS i ON n.sha1 = i.sha1
+ WHERE n.parent = ?`, parent)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ entries = []webEntry{}
+ for rows.Next() {
+ var e webEntry
+ if err = rows.Scan(
+ &e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil {
+ return nil, err
+ }
+ entries = append(entries, e)
+ }
+ return entries, rows.Err()
+}
+
+func handleAPIBrowse(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Path string
+ }
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Subdirectories []string `json:"subdirectories"`
+ Entries []webEntry `json:"entries"`
+ }
+
+ tx, err := db.Begin()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ defer tx.Rollback()
+
+ parent, err := idForDirectoryPath(tx, decodeWebPath(params.Path), false)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ }
+
+ result.Subdirectories, err = getSubdirectories(tx, parent)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ result.Entries, err = getSubentries(tx, parent)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webTagNamespace struct {
+ Description string `json:"description"`
+ Tags map[string]int64 `json:"tags"`
+}
+
+func getTags(nsID int64) (result map[string]int64, err error) {
+ rows, err := db.Query(`
+ SELECT t.name, COUNT(ta.tag) AS count
+ FROM tag AS t
+ LEFT JOIN tag_assignment AS ta ON t.id = ta.tag
+ WHERE t.space = ?
+ GROUP BY t.id`, nsID)
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ result = make(map[string]int64)
+ for rows.Next() {
+ var (
+ name string
+ count int64
+ )
+ if err = rows.Scan(&name, &count); err != nil {
+ return
+ }
+ result[name] = count
+ }
+ return result, rows.Err()
+}
+
+func getTagNamespaces(match *string) (
+ result map[string]webTagNamespace, err error) {
+ var rows *sql.Rows
+ if match != nil {
+ rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
+ FROM tag_space WHERE name = ?`, *match)
+ } else {
+ rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
+ FROM tag_space`)
+ }
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ result = make(map[string]webTagNamespace)
+ for rows.Next() {
+ var (
+ id int64
+ name string
+ ns webTagNamespace
+ )
+ if err = rows.Scan(&id, &name, &ns.Description); err != nil {
+ return
+ }
+ if ns.Tags, err = getTags(id); err != nil {
+ return
+ }
+ result[name] = ns
+ }
+ return result, rows.Err()
+}
+
+func handleAPITags(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Namespace *string
+ }
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ result, err := getTagNamespaces(params.Namespace)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webDuplicateImage struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Occurences int64 `json:"occurences"`
+}
+
+// A hamming distance of zero (direct dhash match) will be more than sufficient.
+const duplicatesCTE = `WITH
+ duplicated(dhash, count) AS (
+ SELECT dhash, COUNT(*) AS count FROM image
+ WHERE dhash IS NOT NULL
+ GROUP BY dhash HAVING count > 1
+ ),
+ multipathed(sha1, count) AS (
+ SELECT n.sha1, COUNT(*) AS count FROM node AS n
+ JOIN image AS i ON i.sha1 = n.sha1
+ WHERE i.dhash IS NULL
+ OR i.dhash NOT IN (SELECT dhash FROM duplicated)
+ GROUP BY n.sha1 HAVING count > 1
+ )
+`
+
+func getDuplicatesSimilar(stmt *sql.Stmt, dhash int64) (
+ result []webDuplicateImage, err error) {
+ rows, err := stmt.Query(dhash)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webDuplicateImage{}
+ for rows.Next() {
+ var image webDuplicateImage
+ if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
+ &image.Occurences); err != nil {
+ return nil, err
+ }
+ result = append(result, image)
+ }
+ return result, rows.Err()
+}
+
+func getDuplicates1(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
+ stmt, err := db.Prepare(`
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ COUNT(*) AS occurences
+ FROM image AS i
+ JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.dhash = ?
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT dhash FROM duplicated`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ group []webDuplicateImage
+ dhash int64
+ )
+ if err = rows.Scan(&dhash); err != nil {
+ return nil, err
+ }
+ if group, err = getDuplicatesSimilar(stmt, dhash); err != nil {
+ return nil, err
+ }
+ result = append(result, group)
+ }
+ return result, rows.Err()
+}
+
+func getDuplicates2(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
+ stmt, err := db.Prepare(`
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ COUNT(*) AS occurences
+ FROM image AS i
+ JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.sha1 = ?
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT sha1 FROM multipathed`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ image webDuplicateImage
+ sha1 string
+ )
+ if err = rows.Scan(&sha1); err != nil {
+ return nil, err
+ }
+ if err := stmt.QueryRow(sha1).Scan(&image.SHA1,
+ &image.ThumbW, &image.ThumbH, &image.Occurences); err != nil {
+ return nil, err
+ }
+ result = append(result, []webDuplicateImage{image})
+ }
+ return result, rows.Err()
+}
+
+func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
+ var params struct{}
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var (
+ result = [][]webDuplicateImage{}
+ err error
+ )
+ if result, err = getDuplicates1(result); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if result, err = getDuplicates2(result); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webOrphanImage struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Tags int64 `json:"tags"`
+}
+
+type webOrphan struct {
+ webOrphanImage
+ LastPath string `json:"lastPath"`
+ Replacement *webOrphanImage `json:"replacement"`
+}
+
+func getOrphanReplacement(webPath string) (*webOrphanImage, error) {
+ tx, err := db.Begin()
+ if err != nil {
+ return nil, err
+ }
+ defer tx.Rollback()
+
+ path := decodeWebPath(webPath)
+ if len(path) == 0 {
+ return nil, nil
+ }
+
+ parent, err := idForDirectoryPath(tx, path[:len(path)-1], false)
+ if err != nil {
+ return nil, err
+ }
+
+ var image webOrphanImage
+ err = db.QueryRow(`SELECT i.sha1,
+ IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
+ FROM node AS n
+ JOIN image AS i ON n.sha1 = i.sha1
+ LEFT JOIN tag_assignment AS ta ON n.sha1 = ta.sha1
+ WHERE n.parent = ? AND n.name = ?
+ GROUP BY n.sha1`, parent, path[len(path)-1]).Scan(
+ &image.SHA1, &image.ThumbW, &image.ThumbH, &image.Tags)
+ if errors.Is(err, sql.ErrNoRows) {
+ return nil, nil
+ } else if err != nil {
+ return nil, err
+ }
+ return &image, nil
+}
+
+func getOrphans() (result []webOrphan, err error) {
+ rows, err := db.Query(`SELECT o.sha1, o.path,
+ IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
+ FROM orphan AS o
+ JOIN image AS i ON o.sha1 = i.sha1
+ LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1
+ GROUP BY o.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webOrphan{}
+ for rows.Next() {
+ var orphan webOrphan
+ if err = rows.Scan(&orphan.SHA1, &orphan.LastPath,
+ &orphan.ThumbW, &orphan.ThumbH, &orphan.Tags); err != nil {
+ return nil, err
+ }
+
+ orphan.Replacement, err = getOrphanReplacement(orphan.LastPath)
+ if err != nil {
+ return nil, err
+ }
+
+ result = append(result, orphan)
+ }
+ return result, rows.Err()
+}
+
+func handleAPIOrphans(w http.ResponseWriter, r *http.Request) {
+ var params struct{}
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ result, err := getOrphans()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func getImageDimensions(sha1 string) (w int64, h int64, err error) {
+ err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`,
+ sha1).Scan(&w, &h)
+ return
+}
+
+func getImagePaths(sha1 string) (paths []string, err error) {
+ rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS (
+ SELECT parent, name AS path FROM node WHERE sha1 = ?
+ UNION ALL
+ SELECT n.parent, n.name || '/' || p.path
+ FROM node AS n JOIN paths AS p ON n.id = p.parent
+ ) SELECT path FROM paths WHERE parent IS NULL`, sha1)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ paths = []string{}
+ for rows.Next() {
+ var path string
+ if err := rows.Scan(&path); err != nil {
+ return nil, err
+ }
+ paths = append(paths, path)
+ }
+ return paths, rows.Err()
+}
+
+func getImageTags(sha1 string) (map[string]map[string]float32, error) {
+ rows, err := db.Query(`
+ SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta
+ JOIN tag AS t ON t.id = ta.tag
+ JOIN tag_space AS ts ON ts.id = t.space
+ WHERE ta.sha1 = ?`, sha1)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result := make(map[string]map[string]float32)
+ for rows.Next() {
+ var (
+ space, tag string
+ weight float32
+ )
+ if err := rows.Scan(&space, &tag, &weight); err != nil {
+ return nil, err
+ }
+
+ tags := result[space]
+ if tags == nil {
+ tags = make(map[string]float32)
+ result[space] = tags
+ }
+ tags[tag] = weight
+ }
+ return result, rows.Err()
+}
+
+func handleAPIInfo(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ SHA1 string
+ }
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Width int64 `json:"width"`
+ Height int64 `json:"height"`
+ Paths []string `json:"paths"`
+ Tags map[string]map[string]float32 `json:"tags"`
+ }
+
+ var err error
+ result.Width, result.Height, err = getImageDimensions(params.SHA1)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ result.Paths, err = getImagePaths(params.SHA1)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ result.Tags, err = getImageTags(params.SHA1)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type webSimilarImage struct {
+ SHA1 string `json:"sha1"`
+ PixelsRatio float32 `json:"pixelsRatio"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Paths []string `json:"paths"`
+}
+
+func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (
+ result []webSimilarImage, err error) {
+ // For distance ∈ {0, 1}, this query is quite inefficient.
+ // In exchange, it's generic.
+ //
+ // If there's a dhash, there should also be thumbnail dimensions,
+ // so not bothering with IFNULL on them.
+ rows, err := db.Query(`
+ SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
+ FROM image WHERE sha1 <> ? AND dhash IS NOT NULL
+ AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webSimilarImage{}
+ for rows.Next() {
+ var (
+ match webSimilarImage
+ matchPixels int64
+ )
+ if err = rows.Scan(&match.SHA1,
+ &matchPixels, &match.ThumbW, &match.ThumbH); err != nil {
+ return nil, err
+ }
+ if match.Paths, err = getImagePaths(match.SHA1); err != nil {
+ return nil, err
+ }
+ match.PixelsRatio = float32(matchPixels) / float32(pixels)
+ result = append(result, match)
+ }
+ return result, rows.Err()
+}
+
+func getSimilarGroups(sha1 string, dhash int64, pixels int64,
+ output map[string][]webSimilarImage) error {
+ var err error
+ for distance := 0; distance <= 1; distance++ {
+ output[fmt.Sprintf("Perceptual distance %d", distance)], err =
+ getSimilar(sha1, dhash, pixels, distance)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ SHA1 string
+ }
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Info webSimilarImage `json:"info"`
+ Groups map[string][]webSimilarImage `json:"groups"`
+ }
+
+ result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1}
+ if paths, err := getImagePaths(params.SHA1); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ } else {
+ result.Info.Paths = paths
+ }
+
+ var (
+ width, height int64
+ dhash sql.NullInt64
+ )
+ err := db.QueryRow(`
+ SELECT width, height, dhash, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
+ FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &dhash,
+ &result.Info.ThumbW, &result.Info.ThumbH)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ result.Groups = make(map[string][]webSimilarImage)
+ if dhash.Valid {
+ if err := getSimilarGroups(
+ params.SHA1, dhash.Int64, width*height, result.Groups); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// NOTE: AND will mean MULTIPLY(IFNULL(ta.weight, 0)) per SHA1.
+const searchCTE = `WITH
+ matches(sha1, thumbw, thumbh, score) AS (
+ SELECT i.sha1, i.thumbw, i.thumbh, ta.weight AS score
+ FROM tag_assignment AS ta
+ JOIN image AS i ON i.sha1 = ta.sha1
+ WHERE ta.tag = ?
+ ),
+ supertags(tag) AS (
+ SELECT DISTINCT ta.tag
+ FROM tag_assignment AS ta
+ JOIN matches AS m ON m.sha1 = ta.sha1
+ ),
+ scoredtags(tag, score) AS (
+ -- The cross join is a deliberate optimization,
+ -- and this query may still be really slow.
+ SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score
+ FROM matches AS m
+ CROSS JOIN supertags AS st
+ LEFT JOIN tag_assignment AS ta
+ ON ta.sha1 = m.sha1 AND ta.tag = st.tag
+ GROUP BY st.tag
+ -- Using the column alias doesn't fail, but it also doesn't work.
+ HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01
+ )
+`
+
+type webTagMatch struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Score float32 `json:"score"`
+}
+
+func getTagMatches(tag int64) (matches []webTagMatch, err error) {
+ rows, err := db.Query(searchCTE+`
+ SELECT sha1, IFNULL(thumbw, 0), IFNULL(thumbh, 0), score
+ FROM matches`, tag)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ matches = []webTagMatch{}
+ for rows.Next() {
+ var match webTagMatch
+ if err = rows.Scan(&match.SHA1,
+ &match.ThumbW, &match.ThumbH, &match.Score); err != nil {
+ return nil, err
+ }
+ matches = append(matches, match)
+ }
+ return matches, rows.Err()
+}
+
+type webTagRelated struct {
+ Tag string `json:"tag"`
+ Score float32 `json:"score"`
+}
+
+func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) {
+ rows, err := db.Query(searchCTE+`
+ SELECT ts.name, t.name, st.score FROM scoredtags AS st
+ JOIN tag AS t ON st.tag = t.id
+ JOIN tag_space AS ts ON ts.id = t.space
+ ORDER BY st.score DESC`, tag)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = make(map[string][]webTagRelated)
+ for rows.Next() {
+ var (
+ space string
+ r webTagRelated
+ )
+ if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil {
+ return nil, err
+ }
+ result[space] = append(result[space], r)
+ }
+ return result, rows.Err()
+}
+
+func handleAPISearch(w http.ResponseWriter, r *http.Request) {
+ var params struct {
+ Query string
+ }
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ var result struct {
+ Matches []webTagMatch `json:"matches"`
+ Related map[string][]webTagRelated `json:"related"`
+ }
+
+ space, tag, _ := strings.Cut(params.Query, ":")
+
+ var tagID int64
+ err := db.QueryRow(`
+ SELECT t.id FROM tag AS t
+ JOIN tag_space AS ts ON t.space = ts.id
+ WHERE ts.name = ? AND t.name = ?`, space, tag).Scan(&tagID)
+ if errors.Is(err, sql.ErrNoRows) {
+ http.Error(w, err.Error(), http.StatusNotFound)
+ return
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if result.Matches, err = getTagMatches(tagID); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if result.Related, err = getTagRelated(tagID); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// cmdWeb runs a web UI against GD on ADDRESS.
+func cmdWeb(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ address := fs.Arg(1)
+
+ // This separation is not strictly necessary,
+ // but having an elementary level of security doesn't hurt either.
+ staticHandler = http.FileServer(http.Dir("public"))
+
+ http.HandleFunc("/", handleRequest)
+ http.HandleFunc("/image/", handleImages)
+ http.HandleFunc("/thumb/", handleThumbs)
+ http.HandleFunc("/api/browse", handleAPIBrowse)
+ http.HandleFunc("/api/tags", handleAPITags)
+ http.HandleFunc("/api/duplicates", handleAPIDuplicates)
+ http.HandleFunc("/api/orphans", handleAPIOrphans)
+ http.HandleFunc("/api/info", handleAPIInfo)
+ http.HandleFunc("/api/similar", handleAPISimilar)
+ http.HandleFunc("/api/search", handleAPISearch)
+
+ host, port, err := net.SplitHostPort(address)
+ if err != nil {
+ log.Println(err)
+ } else if host == "" {
+ log.Println("http://" + net.JoinHostPort("localhost", port))
+ } else {
+ log.Println("http://" + address)
+ }
+
+ s := &http.Server{
+ Addr: address,
+ ReadTimeout: 60 * time.Second,
+ WriteTimeout: 60 * time.Second,
+ MaxHeaderBytes: 32 << 10,
+ }
+ return s.ListenAndServe()
+}
+
+// --- Sync --------------------------------------------------------------------
+
+type syncFileInfo struct {
+ dbID int64 // DB node ID, or zero if there was none
+ dbParent int64 // where the file was to be stored
+ dbName string // the name under which it was to be stored
+ fsPath string // symlink target
+ fsMtime int64 // last modified Unix timestamp, used a bit like an ID
+
+ err error // any processing error
+ sha1 string // raw content hash, empty to skip file
+ width int // image width in pixels
+ height int // image height in pixels
+}
+
+type syncContext struct {
+ ctx context.Context
+ tx *sql.Tx
+ info chan syncFileInfo
+ pb *progressBar
+
+ stmtOrphan *sql.Stmt
+ stmtDisposeSub *sql.Stmt
+ stmtDisposeAll *sql.Stmt
+
+ // linked tracks which image hashes we've checked so far in the run.
+ linked map[string]struct{}
+}
+
+func syncPrintf(c *syncContext, format string, v ...any) {
+ c.pb.Interrupt(func() { log.Printf(format+"\n", v...) })
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+type syncNode struct {
+ dbID int64
+ dbName string
+ dbMtime int64
+ dbSHA1 string
+}
+
+func (n *syncNode) dbIsDir() bool { return n.dbSHA1 == "" }
+
+type syncFile struct {
+ fsName string
+ fsMtime int64
+ fsIsDir bool
+}
+
+type syncPair struct {
+ db *syncNode
+ fs *syncFile
+}
+
+// syncGetNodes returns direct children of a DB node, ordered by name.
+// SQLite, like Go, compares strings byte-wise by default.
+func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) {
+ // This works even for the root, which doesn't exist as a DB node.
+ rows, err := tx.Query(`SELECT id, name, IFNULL(mtime, 0), IFNULL(sha1, '')
+ FROM node WHERE IFNULL(parent, 0) = ? ORDER BY name`, dbParent)
+ if err != nil {
+ return
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var node syncNode
+ if err = rows.Scan(&node.dbID,
+ &node.dbName, &node.dbMtime, &node.dbSHA1); err != nil {
+ return
+ }
+ nodes = append(nodes, node)
+ }
+ return nodes, rows.Err()
+}
+
+// syncGetFiles returns direct children of a FS directory, ordered by name.
+func syncGetFiles(fsPath string) (files []syncFile, err error) {
+ dir, err := os.Open(fsPath)
+ if err != nil {
+ return
+ }
+ defer dir.Close()
+
+ entries, err := dir.ReadDir(0)
+ if err != nil {
+ return
+ }
+
+ for _, entry := range entries {
+ info, err := entry.Info()
+ if err != nil {
+ return files, err
+ }
+
+ files = append(files, syncFile{
+ fsName: entry.Name(),
+ fsMtime: info.ModTime().Unix(),
+ fsIsDir: entry.IsDir(),
+ })
+ }
+ sort.Slice(files,
+ func(a, b int) bool { return files[a].fsName < files[b].fsName })
+ return
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func syncIsImage(path string) (bool, error) {
+ out, err := exec.Command("xdg-mime", "query", "filetype", path).Output()
+ if err != nil {
+ return false, err
+ }
+
+ return bytes.HasPrefix(out, []byte("image/")), nil
+}
+
+func syncPingImage(path string) (int, int, error) {
+ out, err := exec.Command("magick", "identify", "-limit", "thread", "1",
+ "-ping", "-format", "%w %h", path+"[0]").Output()
+ if err != nil {
+ return 0, 0, err
+ }
+
+ var w, h int
+ _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
+ return w, h, err
+}
+
+func syncProcess(c *syncContext, info *syncFileInfo) error {
+ // Skip videos, which ImageMagick can process, but we don't want it to,
+ // so that they're not converted 1:1 to WebP.
+ pathIsImage, err := syncIsImage(info.fsPath)
+ if err != nil {
+ return err
+ }
+ if !pathIsImage {
+ return nil
+ }
+
+ info.width, info.height, err = syncPingImage(info.fsPath)
+ if err != nil {
+ return err
+ }
+
+ f, err := os.Open(info.fsPath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ // We could make this at least somewhat interruptible by c.ctx,
+ // though it would still work poorly.
+ hash := sha1.New()
+ _, err = io.CopyBuffer(hash, f, make([]byte, 65536))
+ if err != nil {
+ return err
+ }
+
+ info.sha1 = hex.EncodeToString(hash.Sum(nil))
+ return nil
+}
+
+// syncEnqueue runs file scanning, which can be CPU and I/O expensive,
+// in parallel. The goroutine only touches the filesystem, read-only.
+func syncEnqueue(c *syncContext, info syncFileInfo) error {
+ if err := taskSemaphore.acquire(c.ctx); err != nil {
+ return err
+ }
+
+ go func(info syncFileInfo) {
+ defer taskSemaphore.release()
+ info.err = syncProcess(c, &info)
+ c.info <- info
+ }(info)
+ return nil
+}
+
+// syncDequeue flushes the result queue of finished asynchronous tasks.
+func syncDequeue(c *syncContext) error {
+ for {
+ select {
+ case <-c.ctx.Done():
+ return c.ctx.Err()
+ case info := <-c.info:
+ if err := syncPostProcess(c, info); err != nil {
+ return err
+ }
+ default:
+ return nil
+ }
+ }
+}
+
+// syncDispose creates orphan records for the entire subtree given by nodeID
+// as appropriate, then deletes all nodes within the subtree. The subtree root
+// node is not deleted if "keepNode" is true.
+//
+// Orphans keep their thumbnail files, as evidence.
+func syncDispose(c *syncContext, nodeID int64, keepNode bool) error {
+ if _, err := c.stmtOrphan.Exec(nodeID); err != nil {
+ return err
+ }
+
+ if keepNode {
+ if _, err := c.stmtDisposeSub.Exec(nodeID); err != nil {
+ return err
+ }
+ } else {
+ if _, err := c.stmtDisposeAll.Exec(nodeID); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncImageResave(c *syncContext, path string, target string) error {
+ dirname, _ := filepath.Split(path)
+ if err := os.MkdirAll(dirname, 0755); err != nil {
+ return err
+ }
+
+ for {
+ // Try to remove anything standing in the way.
+ err := os.Remove(path)
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ // TODO: Make it possible to copy or reflink (ioctl FICLONE).
+ err = os.Symlink(target, path)
+ if err == nil || !errors.Is(err, fs.ErrExist) {
+ return err
+ }
+ }
+}
+
+func syncImageSave(c *syncContext, sha1 string, target string) error {
+ if _, ok := c.linked[sha1]; ok {
+ return nil
+ }
+
+ ok, path := false, imagePath(sha1)
+ if link, err := os.Readlink(path); err == nil {
+ ok = link == target
+ } else {
+ // If it exists, but it is not a symlink, let it be.
+ // Even though it may not be a regular file.
+ ok = errors.Is(err, syscall.EINVAL)
+ }
+
+ if !ok {
+ if err := syncImageResave(c, path, target); err != nil {
+ return err
+ }
+ }
+
+ c.linked[sha1] = struct{}{}
+ return nil
+}
+
+func syncImage(c *syncContext, info syncFileInfo) error {
+ if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height)
+ VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`,
+ info.sha1, info.width, info.height); err != nil {
+ return err
+ }
+
+ return syncImageSave(c, info.sha1, info.fsPath)
+}
+
+func syncPostProcess(c *syncContext, info syncFileInfo) error {
+ defer c.pb.Step()
+
+ // TODO: When replacing an image node (whether it has or doesn't have
+ // other links to keep it alive), we could offer copying all tags,
+ // though this needs another table to track it.
+ // (If it's equivalent enough, the dhash will stay the same,
+ // so user can resolve this through the duplicates feature.)
+ switch {
+ case info.err != nil:
+ // * → error
+ if ee, ok := info.err.(*exec.ExitError); ok {
+ syncPrintf(c, "%s: %s", info.fsPath, ee.Stderr)
+ } else {
+ return info.err
+ }
+ fallthrough
+
+ case info.sha1 == "":
+ // 0 → 0
+ if info.dbID == 0 {
+ return nil
+ }
+
+ // D → 0, F → 0
+ // TODO: Make it possible to disable removal (for copying only?)
+ return syncDispose(c, info.dbID, false /*keepNode*/)
+
+ case info.dbID == 0:
+ // 0 → F
+ if err := syncImage(c, info); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`INSERT INTO node(parent, name, mtime, sha1)
+ VALUES (?, ?, ?, ?)`,
+ info.dbParent, info.dbName, info.fsMtime, info.sha1); err != nil {
+ return err
+ }
+ return nil
+
+ default:
+ // D → F, F → F (this statement is a no-op with the latter)
+ if err := syncDispose(c, info.dbID, true /*keepNode*/); err != nil {
+ return err
+ }
+
+ // Even if the hash didn't change, see comment in syncDirectoryPair().
+ if err := syncImage(c, info); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`UPDATE node SET mtime = ?, sha1 = ?
+ WHERE id = ?`, info.fsMtime, info.sha1, info.dbID); err != nil {
+ return err
+ }
+ return nil
+ }
+}
+
+func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string,
+ pair syncPair) error {
+ db, fs, fsInfo := pair.db, pair.fs, syncFileInfo{dbParent: dbParent}
+ if db != nil {
+ fsInfo.dbID = db.dbID
+ }
+ if fs != nil {
+ fsInfo.dbName = fs.fsName
+ fsInfo.fsPath = filepath.Join(fsPath, fs.fsName)
+ fsInfo.fsMtime = fs.fsMtime
+ }
+
+ switch {
+ case db == nil && fs == nil:
+ // 0 → 0, unreachable.
+
+ case db == nil && fs.fsIsDir:
+ // 0 → D
+ var id int64
+ if result, err := c.tx.Exec(`INSERT INTO node(parent, name)
+ VALUES (?, ?)`, dbParent, fs.fsName); err != nil {
+ return err
+ } else if id, err = result.LastInsertId(); err != nil {
+ return err
+ }
+ return syncDirectory(c, id, fsInfo.fsPath)
+
+ case db == nil:
+ // 0 → F (or 0 → 0)
+ return syncEnqueue(c, fsInfo)
+
+ case fs == nil:
+ // D → 0, F → 0
+ // TODO: Make it possible to disable removal (for copying only?)
+ return syncDispose(c, db.dbID, false /*keepNode*/)
+
+ case db.dbIsDir() && fs.fsIsDir:
+ // D → D
+ return syncDirectory(c, db.dbID, fsInfo.fsPath)
+
+ case db.dbIsDir():
+ // D → F (or D → 0)
+ return syncEnqueue(c, fsInfo)
+
+ case fs.fsIsDir:
+ // F → D
+ if err := syncDispose(c, db.dbID, true /*keepNode*/); err != nil {
+ return err
+ }
+ if _, err := c.tx.Exec(`UPDATE node
+ SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil {
+ return err
+ }
+ return syncDirectory(c, db.dbID, fsInfo.fsPath)
+
+ case db.dbMtime != fs.fsMtime:
+ // F → F (or F → 0)
+ // Assuming that any content modifications change the timestamp.
+ return syncEnqueue(c, fsInfo)
+
+ default:
+ // F → F
+ // Try to fix symlinks, to handle the following situations:
+ // 1. Image A occurs in paths 1 and 2, we use a symlink to path 1,
+ // and path 1 is removed from the filesystem:
+ // path 2 would not resolve if the mtime didn't change.
+ // 2. Image A occurs in paths 1 and 2, we use a symlink to path 1,
+ // and path 1 is changed:
+ // path 2 would resolve to the wrong file.
+ // This may relink images with multiple occurences unnecessarily,
+ // but it will always fix the roots that are being synced.
+ if err := syncImageSave(c, db.dbSHA1, fsInfo.fsPath); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
+ db, err := syncGetNodes(c.tx, dbParent)
+ if err != nil {
+ return err
+ }
+
+ fs, err := syncGetFiles(fsPath)
+ if err != nil {
+ return err
+ }
+
+ // This would not be fatal, but it has annoying consequences.
+ if _, ok := slices.BinarySearchFunc(fs, syncFile{fsName: nameOfDB},
+ func(a, b syncFile) int {
+ return strings.Compare(a.fsName, b.fsName)
+ }); ok {
+ syncPrintf(c, "%s may be a gallery directory, treating as empty",
+ fsPath)
+ fs = nil
+ }
+
+ // Convert differences to a form more convenient for processing.
+ iDB, iFS, pairs := 0, 0, []syncPair{}
+ for iDB < len(db) && iFS < len(fs) {
+ if db[iDB].dbName == fs[iFS].fsName {
+ pairs = append(pairs, syncPair{&db[iDB], &fs[iFS]})
+ iDB++
+ iFS++
+ } else if db[iDB].dbName < fs[iFS].fsName {
+ pairs = append(pairs, syncPair{&db[iDB], nil})
+ iDB++
+ } else {
+ pairs = append(pairs, syncPair{nil, &fs[iFS]})
+ iFS++
+ }
+ }
+ for i := range db[iDB:] {
+ pairs = append(pairs, syncPair{&db[iDB+i], nil})
+ }
+ for i := range fs[iFS:] {
+ pairs = append(pairs, syncPair{nil, &fs[iFS+i]})
+ }
+
+ for _, pair := range pairs {
+ if err := syncDequeue(c); err != nil {
+ return err
+ }
+ if err := syncDirectoryPair(c, dbParent, fsPath, pair); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func syncRoot(c *syncContext, dbPath []string, fsPath string) error {
+ // TODO: Support synchronizing individual files.
+ // This can only be treated as 0 → F, F → F, or D → F, that is,
+ // a variation on current syncEnqueue(), but dbParent must be nullable.
+
+ // Figure out a database root (not trying to convert F → D on conflict,
+ // also because we don't know yet if the argument is a directory).
+ //
+ // Synchronizing F → D or * → F are special cases not worth implementing.
+ dbParent, err := idForDirectoryPath(c.tx, dbPath, true)
+ if err != nil {
+ return err
+ }
+ if err := syncDirectory(c, dbParent, fsPath); err != nil {
+ return err
+ }
+
+ // Wait for all tasks to finish, and process the results of their work.
+ for i := 0; i < cap(taskSemaphore); i++ {
+ if err := taskSemaphore.acquire(c.ctx); err != nil {
+ return err
+ }
+ }
+ if err := syncDequeue(c); err != nil {
+ return err
+ }
+
+ // This is not our semaphore, so prepare it for the next user.
+ for i := 0; i < cap(taskSemaphore); i++ {
+ taskSemaphore.release()
+ }
+
+ // Delete empty directories, from the bottom of the tree up to,
+ // but not including, the inserted root.
+ //
+ // We need to do this at the end due to our recursive handling,
+ // as well as because of asynchronous file filtering.
+ stmt, err := c.tx.Prepare(`
+ WITH RECURSIVE subtree(id, parent, sha1, level) AS (
+ SELECT id, parent, sha1, 1 FROM node WHERE id = ?
+ UNION ALL
+ SELECT n.id, n.parent, n.sha1, s.level + 1
+ FROM node AS n JOIN subtree AS s ON n.parent = s.id
+ ) DELETE FROM node WHERE id IN (
+ SELECT id FROM subtree WHERE level <> 1 AND sha1 IS NULL
+ AND id NOT IN (SELECT parent FROM node WHERE parent IS NOT NULL)
+ )`)
+ if err != nil {
+ return err
+ }
+
+ for {
+ if result, err := stmt.Exec(dbParent); err != nil {
+ return err
+ } else if n, err := result.RowsAffected(); err != nil {
+ return err
+ } else if n == 0 {
+ return nil
+ }
+ }
+}
+
+type syncPath struct {
+ db []string // database path, in terms of nodes
+ fs string // normalized filesystem path
+}
+
+// syncResolveRoots normalizes filesystem paths given in command line arguments,
+// and figures out a database path for each. Duplicates are skipped or rejected.
+func syncResolveRoots(args []string, fullpaths bool) (
+ roots []*syncPath, err error) {
+ for i := range args {
+ fs, err := filepath.Abs(filepath.Clean(args[i]))
+ if err != nil {
+ return nil, err
+ }
+
+ roots = append(roots,
+ &syncPath{decodeWebPath(filepath.ToSlash(fs)), fs})
+ }
+
+ if fullpaths {
+ // Filter out duplicates. In this case, they're just duplicated work.
+ slices.SortFunc(roots, func(a, b *syncPath) int {
+ return strings.Compare(a.fs, b.fs)
+ })
+ roots = slices.CompactFunc(roots, func(a, b *syncPath) bool {
+ if a.fs != b.fs && !strings.HasPrefix(b.fs, a.fs+"/") {
+ return false
+ }
+ log.Printf("asking to sync path twice: %s\n", b.fs)
+ return true
+ })
+ } else {
+ // Keep just the basenames.
+ for _, path := range roots {
+ if len(path.db) > 0 {
+ path.db = path.db[len(path.db)-1:]
+ }
+ }
+
+ // Different filesystem paths mapping to the same DB location
+ // are definitely a problem we would like to avoid,
+ // otherwise we don't care.
+ slices.SortFunc(roots, func(a, b *syncPath) int {
+ return slices.Compare(a.db, b.db)
+ })
+ for i := 1; i < len(roots); i++ {
+ if slices.Equal(roots[i-1].db, roots[i].db) {
+ return nil, fmt.Errorf("duplicate root: %v", roots[i].db)
+ }
+ }
+ }
+ return
+}
+
+const disposeCTE = `WITH RECURSIVE
+ root(id, sha1, parent, path) AS (
+ SELECT id, sha1, parent, name FROM node WHERE id = ?
+ UNION ALL
+ SELECT r.id, r.sha1, n.parent, n.name || '/' || r.path
+ FROM node AS n JOIN root AS r ON n.id = r.parent
+ ),
+ children(id, sha1, path, level) AS (
+ SELECT id, sha1, path, 1 FROM root WHERE parent IS NULL
+ UNION ALL
+ SELECT n.id, n.sha1, c.path || '/' || n.name, c.level + 1
+ FROM node AS n JOIN children AS c ON n.parent = c.id
+ ),
+ removed(sha1, count, path) AS (
+ SELECT sha1, COUNT(*) AS count, MIN(path) AS path
+ FROM children
+ GROUP BY sha1
+ ),
+ orphaned(sha1, path, count, total) AS (
+ SELECT r.sha1, r.path, r.count, COUNT(*) AS total
+ FROM removed AS r
+ JOIN node ON node.sha1 = r.sha1
+ GROUP BY node.sha1
+ HAVING count = total
+ )`
+
+// cmdSync ensures the given (sub)roots are accurately reflected
+// in the database.
+func cmdSync(fs *flag.FlagSet, args []string) error {
+ fullpaths := fs.Bool("fullpaths", false, "don't basename arguments")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ roots, err := syncResolveRoots(fs.Args()[1:], *fullpaths)
+ if err != nil {
+ return err
+ }
+
+ ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
+ defer stop()
+
+ // In case of a failure during processing, the only retained side effects
+ // on the filesystem tree are:
+ // - Fixing dead symlinks to images.
+ // - Creating symlinks to images that aren't used by anything.
+ tx, err := db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ // Mild hack: upgrade the transaction to a write one straight away,
+ // in order to rule out deadlocks (preventable failure).
+ if _, err := tx.Exec(`END TRANSACTION;
+ BEGIN IMMEDIATE TRANSACTION`); err != nil {
+ return err
+ }
+
+ c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1),
+ linked: make(map[string]struct{})}
+ defer c.pb.Stop()
+
+ if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
+ INSERT OR IGNORE INTO orphan(sha1, path)
+ SELECT sha1, path FROM orphaned`); err != nil {
+ return err
+ }
+ if c.stmtDisposeSub, err = c.tx.Prepare(disposeCTE + `
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children WHERE level <> 1)`); err != nil {
+ return err
+ }
+ if c.stmtDisposeAll, err = c.tx.Prepare(disposeCTE + `
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children)`); err != nil {
+ return err
+ }
+
+ // Info tasks take a position in the task semaphore channel.
+ // then fill the info channel.
+ //
+ // Immediately after syncDequeue(), the info channel is empty,
+ // but the semaphore might be full.
+ //
+ // By having at least one position in the info channel,
+ // we allow at least one info task to run to semaphore release,
+ // so that syncEnqueue() doesn't deadlock.
+ //
+ // By making it the same size as the semaphore,
+ // the end of this function doesn't need to dequeue while waiting.
+ // It also prevents goroutine leaks despite leaving them running--
+ // once they finish their job, they're gone,
+ // and eventually the info channel would get garbage collected.
+ //
+ // The additional slot is there to handle the one result
+ // that may be placed while syncEnqueue() waits for the semaphore,
+ // i.e., it is for the result of the task that syncEnqueue() spawns.
+ c.info = make(chan syncFileInfo, cap(taskSemaphore)+1)
+
+ for _, root := range roots {
+ if err := syncRoot(&c, root.db, root.fs); err != nil {
+ return err
+ }
+ }
+ return tx.Commit()
+}
+
+// --- Removal -----------------------------------------------------------------
+
+// cmdRemove is for manual removal of subtrees from the database.
+// Beware that inputs are database, not filesystem paths.
+func cmdRemove(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ tx, err := db.BeginTx(context.Background(), nil)
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ for _, path := range fs.Args()[1:] {
+ var id sql.NullInt64
+ for _, name := range decodeWebPath(path) {
+ if err := tx.QueryRow(`SELECT id FROM node
+ WHERE parent IS ? AND name = ?`,
+ id, name).Scan(&id); err != nil {
+ return err
+ }
+ }
+ if id.Int64 == 0 {
+ return errors.New("can't remove root")
+ }
+
+ if _, err = tx.Exec(disposeCTE+`
+ INSERT OR IGNORE INTO orphan(sha1, path)
+ SELECT sha1, path FROM orphaned`, id); err != nil {
+ return err
+ }
+ if _, err = tx.Exec(disposeCTE+`
+ DELETE FROM node WHERE id
+ IN (SELECT DISTINCT id FROM children)`, id); err != nil {
+ return err
+ }
+ }
+ return tx.Commit()
+}
+
+// --- Tagging -----------------------------------------------------------------
+
+// cmdTag mass imports tags from data passed on stdin as a TSV
+// of SHA1 TAG WEIGHT entries.
+func cmdTag(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 2 || fs.NArg() > 3 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ space := fs.Arg(1)
+
+ var description sql.NullString
+ if fs.NArg() >= 3 {
+ description = sql.NullString{String: fs.Arg(2), Valid: true}
+ }
+
+ // Note that starting as a write transaction prevents deadlocks.
+ // Imports are rare, and just bulk load data, so this scope is fine.
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ defer tx.Rollback()
+
+ if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description)
+ VALUES (?, ?)`, space, description); err != nil {
+ return err
+ }
+
+ var spaceID int64
+ if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`,
+ space).Scan(&spaceID); err != nil {
+ return err
+ }
+
+ // XXX: It might make sense to pre-erase all tag assignments within
+ // the given space for that image, the first time we see it:
+ //
+ // DELETE FROM tag_assignment
+ // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?)
+ //
+ // or even just clear the tag space completely:
+ //
+ // DELETE FROM tag_assignment
+ // WHERE tag IN (SELECT id FROM tag WHERE space = ?);
+ // DELETE FROM tag WHERE space = ?;
+ stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight)
+ VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?)
+ ON CONFLICT DO UPDATE SET weight = ?`)
+ if err != nil {
+ return err
+ }
+
+ scanner := bufio.NewScanner(os.Stdin)
+ for scanner.Scan() {
+ fields := strings.Split(scanner.Text(), "\t")
+ if len(fields) != 3 {
+ return errors.New("invalid input format")
+ }
+
+ sha1, tag := fields[0], fields[1]
+ weight, err := strconv.ParseFloat(fields[2], 64)
+ if err != nil {
+ return err
+ }
+
+ if _, err := tx.Exec(
+ `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`,
+ spaceID, tag); err != nil {
+ return nil
+ }
+ if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil {
+ log.Printf("%s: %s\n", sha1, err)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return err
+ }
+ return tx.Commit()
+}
+
+// --- Check -------------------------------------------------------------------
+
+func isValidSHA1(hash string) bool {
+ if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash {
+ return false
+ }
+ if _, err := hex.DecodeString(hash); err != nil {
+ return false
+ }
+ return true
+}
+
+func hashesToFileListing(root, suffix string, hashes []string) []string {
+ // Note that we're semi-duplicating {image,thumb}Path().
+ paths := []string{root}
+ for _, hash := range hashes {
+ dir := filepath.Join(root, hash[:2])
+ paths = append(paths, dir, filepath.Join(dir, hash+suffix))
+ }
+ slices.Sort(paths)
+ return slices.Compact(paths)
+}
+
+func collectFileListing(root string) (paths []string, err error) {
+ err = filepath.WalkDir(root,
+ func(path string, d fs.DirEntry, err error) error {
+ paths = append(paths, path)
+ return err
+ })
+
+ // Even though it should already be sorted somehow.
+ slices.Sort(paths)
+ return
+}
+
+func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
+ db := hashesToFileListing(root, suffix, hashes)
+ fs, err := collectFileListing(root)
+ if err != nil {
+ return false, nil, err
+ }
+
+ iDB, iFS, ok, intersection := 0, 0, true, []string{}
+ for iDB < len(db) && iFS < len(fs) {
+ if db[iDB] == fs[iFS] {
+ intersection = append(intersection, db[iDB])
+ iDB++
+ iFS++
+ } else if db[iDB] < fs[iFS] {
+ ok = false
+ fmt.Printf("only in DB: %s\n", db[iDB])
+ iDB++
+ } else {
+ ok = false
+ fmt.Printf("only in FS: %s\n", fs[iFS])
+ iFS++
+ }
+ }
+ for _, path := range db[iDB:] {
+ ok = false
+ fmt.Printf("only in DB: %s\n", path)
+ }
+ for _, path := range fs[iFS:] {
+ ok = false
+ fmt.Printf("only in FS: %s\n", path)
+ }
+ return ok, intersection, nil
+}
+
+func checkHash(path string) (message string, err error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return err.Error(), nil
+ }
+ defer f.Close()
+
+ // We get 2 levels of parent directories in here, just filter them out.
+ if fi, err := f.Stat(); err != nil {
+ return err.Error(), nil
+ } else if fi.IsDir() {
+ return "", nil
+ }
+
+ hash := sha1.New()
+ _, err = io.CopyBuffer(hash, f, make([]byte, 65536))
+ if err != nil {
+ return err.Error(), nil
+ }
+
+ sha1 := hex.EncodeToString(hash.Sum(nil))
+ if sha1 != filepath.Base(path) {
+ return fmt.Sprintf("mismatch, found %s", sha1), nil
+ }
+ return "", nil
+}
+
+func checkHashes(paths []string) (bool, error) {
+ log.Println("checking image hashes")
+ var failed atomic.Bool
+ err := parallelize(paths, func(path string) (string, error) {
+ message, err := checkHash(path)
+ if message != "" {
+ failed.Store(true)
+ }
+ return message, err
+ })
+ return !failed.Load(), err
+}
+
+// cmdCheck carries out various database consistency checks.
+func cmdCheck(fs *flag.FlagSet, args []string) error {
+ full := fs.Bool("full", false, "verify image hashes")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() != 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ // Check if hashes are in the right format.
+ log.Println("checking image hashes")
+
+ allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`)
+ if err != nil {
+ return err
+ }
+
+ ok := true
+ for _, hash := range allSHA1 {
+ if !isValidSHA1(hash) {
+ ok = false
+ fmt.Printf("invalid image SHA1: %s\n", hash)
+ }
+ }
+
+ // This is, rather obviously, just a strict subset.
+ // Although it doesn't run in the same transaction.
+ thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`)
+ if err != nil {
+ return err
+ }
+
+ // This somewhat duplicates {image,thumb}Path().
+ log.Println("checking SQL against filesystem")
+ okImages, intersection, err := checkFiles(
+ filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
+ if err != nil {
+ return err
+ }
+
+ okThumbs, _, err := checkFiles(
+ filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
+ if err != nil {
+ return err
+ }
+ if !okImages || !okThumbs {
+ ok = false
+ }
+
+ log.Println("checking for dead symlinks")
+ for _, path := range intersection {
+ if _, err := os.Stat(path); err != nil {
+ ok = false
+ fmt.Printf("%s: %s\n", path, err)
+ }
+ }
+
+ if *full {
+ if ok2, err := checkHashes(intersection); err != nil {
+ return err
+ } else if !ok2 {
+ ok = false
+ }
+ }
+
+ if !ok {
+ return errors.New("detected inconsistencies")
+ }
+ return nil
+}
+
+// --- Thumbnailing ------------------------------------------------------------
+
+func identifyThumbnail(path string) (w, h int, err error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return
+ }
+ defer f.Close()
+
+ config, err := webp.DecodeConfig(f)
+ if err != nil {
+ return
+ }
+ return config.Width, config.Height, nil
+}
+
+func makeThumbnail(load bool, pathImage, pathThumb string) (
+ w, h int, err error) {
+ if load {
+ if w, h, err = identifyThumbnail(pathThumb); err == nil {
+ return
+ }
+ }
+
+ thumbDirname, _ := filepath.Split(pathThumb)
+ if err := os.MkdirAll(thumbDirname, 0755); err != nil {
+ return 0, 0, err
+ }
+
+ // Create a normalized thumbnail. Since we don't particularly need
+ // any complex processing, such as surrounding of metadata,
+ // simply push it through ImageMagick.
+ //
+ // - http://www.ericbrasseur.org/gamma.html
+ // - https://www.imagemagick.org/Usage/thumbnails/
+ // - https://imagemagick.org/script/command-line-options.php#layers
+ //
+ // "info:" output is written for each frame, which is why we delete
+ // all of them but the first one beforehands.
+ //
+ // TODO: See if we can optimize resulting WebP animations.
+ // (Do -layers optimize* apply to this format at all?)
+ cmd := exec.Command("magick", "-limit", "thread", "1", pathImage,
+ "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip",
+ "-resize", "256x128>", "-colorspace", "sRGB",
+ "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:")
+
+ out, err := cmd.Output()
+ if err != nil {
+ return 0, 0, err
+ }
+
+ _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
+ return w, h, err
+}
+
+// cmdThumbnail generates missing thumbnails, in parallel.
+func cmdThumbnail(fs *flag.FlagSet, args []string) error {
+ load := fs.Bool("load", false, "try to load existing thumbnail files")
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ hexSHA1 := fs.Args()[1:]
+ if len(hexSHA1) == 0 {
+ // Get all unique images in the database with no thumbnail.
+ var err error
+ hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NULL OR thumbh IS NULL`)
+ if err != nil {
+ return err
+ }
+ }
+
+ stmt, err := db.Prepare(
+ `UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ var mu sync.Mutex
+ return parallelize(hexSHA1, func(sha1 string) (message string, err error) {
+ pathImage := imagePath(sha1)
+ pathThumb := thumbPath(sha1)
+ w, h, err := makeThumbnail(*load, pathImage, pathThumb)
+ if err != nil {
+ if ee, ok := err.(*exec.ExitError); ok {
+ return string(ee.Stderr), nil
+ }
+ return "", err
+ }
+
+ mu.Lock()
+ defer mu.Unlock()
+ _, err = stmt.Exec(w, h, sha1)
+ return "", err
+ })
+}
+
+// --- Perceptual hash ---------------------------------------------------------
+
+type linearImage struct {
+ img image.Image
+}
+
+func newLinearImage(img image.Image) *linearImage {
+ return &linearImage{img: img}
+}
+
+func (l *linearImage) ColorModel() color.Model { return l.img.ColorModel() }
+func (l *linearImage) Bounds() image.Rectangle { return l.img.Bounds() }
+
+func unSRGB(c uint32) uint8 {
+ n := float64(c) / 0xffff
+ if n <= 0.04045 {
+ return uint8(n * (255.0 / 12.92))
+ }
+ return uint8(math.Pow((n+0.055)/(1.055), 2.4) * 255.0)
+}
+
+func (l *linearImage) At(x, y int) color.Color {
+ r, g, b, a := l.img.At(x, y).RGBA()
+ return color.RGBA{
+ R: unSRGB(r), G: unSRGB(g), B: unSRGB(b), A: uint8(a >> 8)}
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// isWebPAnimation returns whether the given ReadSeeker starts a WebP animation.
+// See https://developers.google.com/speed/webp/docs/riff_container
+func isWebPAnimation(rs io.ReadSeeker) (bool, error) {
+ b := make([]byte, 21)
+ if _, err := rs.Read(b); err != nil {
+ return false, err
+ }
+ if _, err := rs.Seek(0, io.SeekStart); err != nil {
+ return false, err
+ }
+
+ return bytes.Equal(b[:4], []byte("RIFF")) &&
+ bytes.Equal(b[8:16], []byte("WEBPVP8X")) &&
+ b[20]&0b00000010 != 0, nil
+}
+
+var errIsAnimation = errors.New("cannot perceptually hash animations")
+
+func dhashWebP(rs io.ReadSeeker) (uint64, error) {
+ if a, err := isWebPAnimation(rs); err != nil {
+ return 0, err
+ } else if a {
+ return 0, errIsAnimation
+ }
+
+ // Doing this entire thing in Go is SLOW, but convenient.
+ source, err := webp.Decode(rs)
+ if err != nil {
+ return 0, err
+ }
+
+ var (
+ linear = newLinearImage(source)
+ resized = image.NewNRGBA64(image.Rect(0, 0, 9, 8))
+ )
+ draw.CatmullRom.Scale(resized, resized.Bounds(),
+ linear, linear.Bounds(), draw.Src, nil)
+
+ var hash uint64
+ for y := 0; y < 8; y++ {
+ var grey [9]float32
+ for x := 0; x < 9; x++ {
+ rgba := resized.NRGBA64At(x, y)
+ grey[x] = 0.2126*float32(rgba.R) +
+ 0.7152*float32(rgba.G) +
+ 0.0722*float32(rgba.B)
+ }
+
+ var row uint64
+ if grey[0] < grey[1] {
+ row |= 1 << 7
+ }
+ if grey[1] < grey[2] {
+ row |= 1 << 6
+ }
+ if grey[2] < grey[3] {
+ row |= 1 << 5
+ }
+ if grey[3] < grey[4] {
+ row |= 1 << 4
+ }
+ if grey[4] < grey[5] {
+ row |= 1 << 3
+ }
+ if grey[5] < grey[6] {
+ row |= 1 << 2
+ }
+ if grey[6] < grey[7] {
+ row |= 1 << 1
+ }
+ if grey[7] < grey[8] {
+ row |= 1 << 0
+ }
+ hash = hash<<8 | row
+ }
+ return hash, nil
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+func makeDhash(sha1 string) (uint64, error) {
+ pathThumb := thumbPath(sha1)
+ f, err := os.Open(pathThumb)
+ if err != nil {
+ return 0, err
+ }
+ defer f.Close()
+ return dhashWebP(f)
+}
+
+// cmdDhash computes perceptual hashes from thumbnails.
+func cmdDhash(fs *flag.FlagSet, args []string) error {
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+ if fs.NArg() < 1 {
+ return errWrongUsage
+ }
+ if err := openDB(fs.Arg(0)); err != nil {
+ return err
+ }
+
+ hexSHA1 := fs.Args()[1:]
+ if len(hexSHA1) == 0 {
+ var err error
+ hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
+ WHERE thumbw IS NOT NULL AND thumbh IS NOT NULL AND dhash IS NULL`)
+ if err != nil {
+ return err
+ }
+ }
+
+ stmt, err := db.Prepare(`UPDATE image SET dhash = ? WHERE sha1 = ?`)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ var mu sync.Mutex
+ return parallelize(hexSHA1, func(sha1 string) (message string, err error) {
+ hash, err := makeDhash(sha1)
+ if errors.Is(err, errIsAnimation) {
+ // Ignoring this common condition.
+ return "", nil
+ } else if err != nil {
+ return err.Error(), nil
+ }
+
+ mu.Lock()
+ defer mu.Unlock()
+ _, err = stmt.Exec(int64(hash), sha1)
+ return "", err
+ })
+}
+
+// --- Main --------------------------------------------------------------------
+
+var errWrongUsage = errors.New("wrong usage")
+
+var commands = map[string]struct {
+ handler func(*flag.FlagSet, []string) error
+ usage string
+ function string
+}{
+ "init": {cmdInit, "GD", "Initialize a database."},
+ "web": {cmdWeb, "GD ADDRESS", "Launch a web interface."},
+ "tag": {cmdTag, "GD SPACE [DESCRIPTION]", "Import tags."},
+ "sync": {cmdSync, "GD ROOT...", "Synchronise with the filesystem."},
+ "remove": {cmdRemove, "GD PATH...", "Remove database subtrees."},
+ "check": {cmdCheck, "GD", "Run consistency checks."},
+ "thumbnail": {cmdThumbnail, "GD [SHA1...]", "Generate thumbnails."},
+ "dhash": {cmdDhash, "GD [SHA1...]", "Compute perceptual hashes."},
+}
+
+func usage() {
+ f := flag.CommandLine.Output()
+ fmt.Fprintf(f, "Usage: %s COMMAND [ARG...]\n", os.Args[0])
+ flag.PrintDefaults()
+
+ // The alphabetic ordering is unfortunate, but tolerable.
+ keys := []string{}
+ for key := range commands {
+ keys = append(keys, key)
+ }
+ sort.Strings(keys)
+
+ fmt.Fprintf(f, "\nCommands:\n")
+ for _, key := range keys {
+ fmt.Fprintf(f, " %s [OPTION...] %s\n \t%s\n",
+ key, commands[key].usage, commands[key].function)
+ }
+}
+
+func main() {
+ // This implements the -h switch for us by default.
+ // The rest of the handling here closely follows what flag does internally.
+ flag.Usage = usage
+ flag.Parse()
+ if flag.NArg() < 1 {
+ flag.Usage()
+ os.Exit(2)
+ }
+
+ cmd, ok := commands[flag.Arg(0)]
+ if !ok {
+ fmt.Fprintf(flag.CommandLine.Output(),
+ "unknown command: %s\n", flag.Arg(0))
+ flag.Usage()
+ os.Exit(2)
+ }
+
+ fs := flag.NewFlagSet(flag.Arg(0), flag.ExitOnError)
+ fs.Usage = func() {
+ fmt.Fprintf(fs.Output(),
+ "Usage: %s [OPTION...] %s\n%s\n",
+ fs.Name(), cmd.usage, cmd.function)
+ fs.PrintDefaults()
+ }
+
+ taskSemaphore = newSemaphore(runtime.NumCPU())
+ err := cmd.handler(fs, flag.Args()[1:])
+
+ // Note that the database object has a closing finalizer,
+ // we just additionally print any errors coming from there.
+ if db != nil {
+ if err := db.Close(); err != nil {
+ log.Println(err)
+ }
+ }
+
+ if errors.Is(err, errWrongUsage) {
+ fs.Usage()
+ os.Exit(2)
+ } else if err != nil {
+ log.Fatalln(err)
+ }
+}
diff --git a/public/gallery.js b/public/gallery.js
new file mode 100644
index 0000000..9d3b067
--- /dev/null
+++ b/public/gallery.js
@@ -0,0 +1,675 @@
+'use strict'
+
+let callActive = false
+let callFaulty = false
+
+function call(method, params) {
+ // XXX: At least with POST, unsuccessful requests result
+ // in catched errors containing Errors with a null message.
+ // This is an issue within XMLHttpRequest.
+ callActive++
+ return m.request({
+ method: "POST",
+ url: `/api/${method}`,
+ body: params,
+ }).then(result => {
+ callActive--
+ callFaulty = false
+ return result
+ }).catch(error => {
+ callActive--
+ callFaulty = true
+ throw error
+ })
+}
+
+const loading = (window.location.hostname !== 'localhost') ? 'lazy' : undefined
+
+let Header = {
+ global: [
+ {name: "Browse", route: '/browse'},
+ {name: "Tags", route: '/tags'},
+ {name: "Duplicates", route: '/duplicates'},
+ {name: "Orphans", route: '/orphans'},
+ ],
+
+ image: [
+ {
+ route: '/view',
+ render: () => m(m.route.Link, {
+ href: `/view/:key`,
+ params: {key: m.route.param('key')},
+ class: m.route.get().startsWith('/view')
+ ? 'active' : undefined,
+ }, "View"),
+ },
+ {
+ route: '/similar',
+ render: () => m(m.route.Link, {
+ href: `/similar/:key`,
+ params: {key: m.route.param('key')},
+ class: m.route.get().startsWith('/similar')
+ ? 'active' : undefined,
+ }, "Similar"),
+ },
+ ],
+
+ search: [
+ {
+ route: '/search',
+ render: () => m(m.route.Link, {
+ href: `/search/:key`,
+ params: {key: m.route.param('key')},
+ class: m.route.get().startsWith('/search')
+ ? 'active' : undefined,
+ }, "Search"),
+ },
+ ],
+
+ view(vnode) {
+ const route = m.route.get()
+ const main = this.global.map(x =>
+ m(m.route.Link, {
+ href: x.route,
+ class: route.startsWith(x.route) ? 'active' : undefined,
+ }, x.name))
+
+ let context
+ if (this.image.some(x => route.startsWith(x.route)))
+ context = this.image.map(x => x.render())
+ if (this.search.some(x => route.startsWith(x.route)))
+ context = this.search.map(x => x.render())
+
+ return m('.header', {}, [
+ m('nav', main),
+ m('nav', context),
+ callFaulty
+ ? m('.activity.error[title=Error]', '●')
+ : callActive
+ ? m('.activity[title=Busy]', '●')
+ : m('.activity[title=Idle]', '○'),
+ ])
+ },
+}
+
+let Thumbnail = {
+ view(vnode) {
+ const e = vnode.attrs.info
+ if (!e.thumbW || !e.thumbH)
+ return m('.thumbnail.missing', {...vnode.attrs, info: null})
+ return m('img.thumbnail', {...vnode.attrs, info: null,
+ src: `/thumb/${e.sha1}`, width: e.thumbW, height: e.thumbH,
+ loading})
+ },
+}
+
+let ScoredTag = {
+ view(vnode) {
+ const {space, tagname, score} = vnode.attrs
+ return m('li', [
+ m("meter[max=1.0]", {value: score, title: score}, score),
+ ` `,
+ m(m.route.Link, {
+ href: `/search/:key`,
+ params: {key: `${space}:${tagname}`},
+ }, ` ${tagname}`),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let BrowseModel = {
+ path: undefined,
+ subdirectories: [],
+ entries: [],
+ collator: new Intl.Collator(undefined, {numeric: true}),
+
+ async reload(path) {
+ if (this.path !== path) {
+ this.path = path
+ this.subdirectories = []
+ this.entries = []
+ }
+
+ let resp = await call('browse', {path})
+ this.subdirectories = resp.subdirectories
+ this.entries = resp.entries.sort((a, b) =>
+ this.collator.compare(a.name, b.name))
+ },
+
+ joinPath(parent, child) {
+ if (!parent)
+ return child
+ if (!child)
+ return parent
+ return `${parent}/${child}`
+ },
+
+ getBrowseLinks() {
+ if (this.path === undefined)
+ return []
+
+ let links = [{name: "Root", path: "", level: -1}], path
+ for (const crumb of this.path.split('/').filter(s => !!s)) {
+ path = this.joinPath(path, crumb)
+ links.push({name: crumb, path: path, level: -1})
+ }
+
+ links[links.length - 1].level = 0
+
+ for (const sub of this.subdirectories) {
+ links.push(
+ {name: sub, path: this.joinPath(this.path, sub), level: +1})
+ }
+ return links
+ },
+}
+
+let BrowseBarLink = {
+ view(vnode) {
+ const link = vnode.attrs.link
+
+ let c = 'selected'
+ if (link.level < 0)
+ c = 'parent'
+ if (link.level > 0)
+ c = 'child'
+
+ return m('li', {
+ class: c,
+ }, m(m.route.Link, {
+ href: `/browse/:key`,
+ params: {key: link.path},
+ }, link.name))
+ },
+}
+
+let BrowseView = {
+ // So that Page Up/Down, etc., work after changing directories.
+ // Programmatically focusing a scrollable element requires setting tabindex,
+ // and causes :focus-visible on page load, which we suppress in CSS.
+ // I wish there was another way, but the workaround isn't particularly bad.
+ // focus({focusVisible: true}) is FF 104+ only and experimental.
+ oncreate(vnode) { vnode.dom.focus() },
+
+ view(vnode) {
+ return m('.browser[tabindex=0]', {
+ // Trying to force the oncreate on path changes.
+ key: BrowseModel.path,
+ }, BrowseModel.entries.map(info => {
+ return m(m.route.Link, {href: `/view/${info.sha1}`},
+ m(Thumbnail, {info, title: info.name}))
+ }))
+ },
+}
+
+let Browse = {
+ // Reload the model immediately, to improve responsivity.
+ // But we don't need to: https://mithril.js.org/route.html#preloading-data
+ // Also see: https://mithril.js.org/route.html#route-cancellation--blocking
+ oninit(vnode) {
+ let path = vnode.attrs.key || ""
+ BrowseModel.reload(path)
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, [
+ m('.sidebar', [
+ m('ul.path', BrowseModel.getBrowseLinks()
+ .map(link => m(BrowseBarLink, {link}))),
+ ]),
+ m(BrowseView),
+ ]),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let TagsModel = {
+ ns: null,
+ namespaces: {},
+
+ async reload(ns) {
+ if (this.ns !== ns) {
+ this.ns = ns
+ this.namespaces = {}
+ }
+
+ this.namespaces = await call('tags', {namespace: ns})
+ },
+}
+
+let TagsList = {
+ view(vnode) {
+ // TODO: Make it possible to sort by count.
+ const tags = Object.entries(vnode.attrs.tags)
+ .sort(([a, b]) => a[0].localeCompare(b[0]))
+
+ return (tags.length == 0)
+ ? "No tags"
+ : m("ul", tags.map(([name, count]) => m("li", [
+ m(m.route.Link, {
+ href: `/search/:key`,
+ params: {key: `${vnode.attrs.space}:${name}`},
+ }, ` ${name}`),
+ ` ×${count}`,
+ ])))
+ },
+}
+
+let TagsView = {
+ // See BrowseView.
+ oncreate(vnode) { vnode.dom.focus() },
+
+ view(vnode) {
+ // XXX: The empty-named tag namespace gets a bit shafted,
+ // in particular in the router, as well as with its header.
+ // Maybe we could refer to it by its numeric ID in routing.
+ const names = Object.keys(TagsModel.namespaces)
+ .sort((a, b) => a.localeCompare(b))
+
+ let children = (names.length == 0)
+ ? "No namespaces"
+ : names.map(space => {
+ const ns = TagsModel.namespaces[space]
+ return [
+ m("h2", space),
+ ns.description ? m("p", ns.description) : [],
+ m(TagsList, {space, tags: ns.tags}),
+ ]
+ })
+ return m('.tags[tabindex=0]', {}, children)
+ },
+}
+
+let Tags = {
+ oninit(vnode) {
+ let ns = vnode.attrs.key
+ TagsModel.reload(ns)
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, m(TagsView)),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let DuplicatesModel = {
+ entries: [],
+
+ async reload() {
+ this.entries = await call('duplicates', {})
+ },
+}
+
+let DuplicatesThumbnail = {
+ view(vnode) {
+ const info = vnode.attrs.info
+ return [
+ m(m.route.Link, {href: `/similar/${info.sha1}`},
+ m(Thumbnail, {info})),
+ (info.occurences != 1) ? ` ×${info.occurences}` : [],
+ ]
+ },
+}
+
+let DuplicatesList = {
+ // See BrowseView.
+ oncreate(vnode) { vnode.dom.focus() },
+
+ view(vnode) {
+ let children = (DuplicatesModel.entries.length == 0)
+ ? "No duplicates"
+ : DuplicatesModel.entries.map(group =>
+ m('.row', group.map(entry =>
+ m(DuplicatesThumbnail, {info: entry}))))
+ return m('.duplicates[tabindex=0]', {}, children)
+ },
+}
+
+let Duplicates = {
+ oninit(vnode) {
+ DuplicatesModel.reload()
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, m(DuplicatesList)),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let OrphansModel = {
+ entries: [],
+
+ async reload() {
+ this.entries = await call('orphans', {})
+ },
+}
+
+let OrphansReplacement = {
+ view(vnode) {
+ const info = vnode.attrs.info
+ if (!info)
+ return []
+
+ return [
+ ` → `,
+ m(m.route.Link, {href: `/view/${info.sha1}`},
+ m(Thumbnail, {info})),
+ `${info.tags} tags`,
+ ]
+ },
+}
+
+let OrphansRow = {
+ view(vnode) {
+ const info = vnode.attrs.info
+ return m('.row', [
+ // It might not load, but still allow tag viewing.
+ m(m.route.Link, {href: `/view/${info.sha1}`},
+ m(Thumbnail, {info})),
+ `${info.tags} tags`,
+ m(OrphansReplacement, {info: info.replacement}),
+ ])
+ },
+}
+
+let OrphansList = {
+ // See BrowseView.
+ oncreate(vnode) { vnode.dom.focus() },
+
+ view(vnode) {
+ let children = (OrphansModel.entries.length == 0)
+ ? "No orphans"
+ : OrphansModel.entries.map(info => [
+ m("h2", info.lastPath),
+ m(OrphansRow, {info}),
+ ])
+ return m('.orphans[tabindex=0]', {}, children)
+ },
+}
+
+let Orphans = {
+ oninit(vnode) {
+ OrphansModel.reload()
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, m(OrphansList)),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let ViewModel = {
+ sha1: undefined,
+ width: 0,
+ height: 0,
+ paths: [],
+ tags: {},
+
+ async reload(sha1) {
+ if (this.sha1 !== sha1) {
+ this.sha1 = sha1
+ this.width = this.height = 0
+ this.paths = []
+ this.tags = {}
+ }
+
+ let resp = await call('info', {sha1: sha1})
+ this.width = resp.width
+ this.height = resp.height
+ this.paths = resp.paths
+ this.tags = resp.tags
+ },
+}
+
+let ViewBarBrowseLink = {
+ view(vnode) {
+ return m(m.route.Link, {
+ href: `/browse/:key`,
+ params: {key: vnode.attrs.path},
+ }, vnode.attrs.name)
+ },
+}
+
+let ViewBarPath = {
+ view(vnode) {
+ const parents = vnode.attrs.path.split('/')
+ const basename = parents.pop()
+
+ let result = [], path
+ if (!parents.length)
+ result.push(m(ViewBarBrowseLink, {path: "", name: "Root"}), "/")
+ for (const crumb of parents) {
+ path = BrowseModel.joinPath(path, crumb)
+ result.push(m(ViewBarBrowseLink, {path, name: crumb}), "/")
+ }
+ result.push(basename)
+ return result
+ },
+}
+
+let ViewBar = {
+ view(vnode) {
+ return m('.viewbar', [
+ m('h2', "Locations"),
+ m('ul', ViewModel.paths.map(path =>
+ m('li', m(ViewBarPath, {path})))),
+ m('h2', "Tags"),
+ Object.entries(ViewModel.tags).map(([space, tags]) => [
+ m("h3", m(m.route.Link, {href: `/tags/${space}`}, space)),
+ m("ul.tags", Object.entries(tags)
+ .sort(([t1, w1], [t2, w2]) => (w2 - w1))
+ .map(([tag, score]) =>
+ m(ScoredTag, {space, tagname: tag, score}))),
+ ]),
+ ])
+ },
+}
+
+let View = {
+ oninit(vnode) {
+ let sha1 = vnode.attrs.key || ""
+ ViewModel.reload(sha1)
+ },
+
+ view(vnode) {
+ const view = m('.view', [
+ ViewModel.sha1 !== undefined
+ ? m('img', {src: `/image/${ViewModel.sha1}`,
+ width: ViewModel.width, height: ViewModel.height})
+ : "No image.",
+ ])
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, [view, m(ViewBar)]),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let SimilarModel = {
+ sha1: undefined,
+ info: {paths: []},
+ groups: {},
+
+ async reload(sha1) {
+ if (this.sha1 !== sha1) {
+ this.sha1 = sha1
+ this.info = {paths: []}
+ this.groups = {}
+ }
+
+ let resp = await call('similar', {sha1: sha1})
+ this.info = resp.info
+ this.groups = resp.groups
+ },
+}
+
+let SimilarThumbnail = {
+ view(vnode) {
+ const info = vnode.attrs.info
+ return m(m.route.Link, {href: `/view/${info.sha1}`},
+ m(Thumbnail, {info}))
+ },
+}
+
+let SimilarGroup = {
+ view(vnode) {
+ const images = vnode.attrs.images
+ let result = [
+ m('h2', vnode.attrs.name),
+ images.map(info => m('.row', [
+ m(SimilarThumbnail, {info}),
+ m('ul', [
+ m('li', Math.round(info.pixelsRatio * 100) +
+ "% pixels of input image"),
+ info.paths.map(path =>
+ m('li', m(ViewBarPath, {path}))),
+ ]),
+ ]))
+ ]
+ if (!images.length)
+ result.push("No matches.")
+ return result
+ },
+}
+
+let SimilarList = {
+ view(vnode) {
+ if (SimilarModel.sha1 === undefined ||
+ SimilarModel.info.paths.length == 0)
+ return "No image"
+
+ const info = SimilarModel.info
+ return m('.similar', {}, [
+ m('.row', [
+ m(SimilarThumbnail, {info}),
+ m('ul', info.paths.map(path =>
+ m('li', m(ViewBarPath, {path})))),
+ ]),
+ Object.entries(SimilarModel.groups).map(([name, images]) =>
+ m(SimilarGroup, {name, images})),
+ ])
+ },
+}
+
+let Similar = {
+ oninit(vnode) {
+ let sha1 = vnode.attrs.key || ""
+ SimilarModel.reload(sha1)
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, m(SimilarList)),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+let SearchModel = {
+ query: undefined,
+ matches: [],
+ related: {},
+
+ async reload(query) {
+ if (this.query !== query) {
+ this.query = query
+ this.matches = []
+ this.related = {}
+ }
+
+ let resp = await call('search', {query})
+ this.matches = resp.matches
+ this.related = resp.related
+ },
+}
+
+let SearchRelated = {
+ view(vnode) {
+ return Object.entries(SearchModel.related)
+ .sort((a, b) => a[0].localeCompare(b[0]))
+ .map(([space, tags]) => [
+ m('h2', space),
+ m('ul.tags', tags
+ .sort((a, b) => (b.score - a.score))
+ .map(({tag, score}) =>
+ m(ScoredTag, {space, tagname: tag, score}))),
+ ])
+ },
+}
+
+let SearchView = {
+ // See BrowseView.
+ oncreate(vnode) { vnode.dom.focus() },
+
+ view(vnode) {
+ return m('.browser[tabindex=0]', {
+ // Trying to force the oncreate on path changes.
+ key: SearchModel.path,
+ }, SearchModel.matches
+ .sort((a, b) => b.score - a.score)
+ .map(info => {
+ return m(m.route.Link, {href: `/view/${info.sha1}`},
+ m(Thumbnail, {info, title: info.score}))
+ }))
+ },
+}
+
+let Search = {
+ oninit(vnode) {
+ SearchModel.reload(vnode.attrs.key)
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m(Header),
+ m('.body', {}, [
+ m('.sidebar', [
+ m('p', SearchModel.query),
+ m(SearchRelated),
+ ]),
+ m(SearchView),
+ ]),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+window.addEventListener('load', () => {
+ m.route(document.body, "/browse/", {
+ // The path doesn't need to be escaped, perhaps change that (":key...").
+ "/browse/": Browse,
+ "/browse/:key": Browse,
+ "/tags": Tags,
+ "/tags/:key": Tags,
+ "/duplicates": Duplicates,
+ "/orphans": Orphans,
+
+ "/view/:key": View,
+ "/similar/:key": Similar,
+
+ "/search/:key": Search,
+ })
+})
diff --git a/public/style.css b/public/style.css
new file mode 100644
index 0000000..1bdeb3f
--- /dev/null
+++ b/public/style.css
@@ -0,0 +1,102 @@
+:root { --shade-color: #eee; }
+
+body { margin: 0; padding: 0; font-family: sans-serif; }
+a { color: inherit; }
+
+.container { display: flex; flex-direction: column;
+ height: 100vh; width: 100vw; overflow: hidden; }
+
+.body { display: flex; flex-grow: 1; overflow: hidden; position: relative; }
+.body::after { content: ''; position: absolute; pointer-events: none;
+ top: 0; left: 0; right: 0; height: .75rem;
+ background: linear-gradient(#fff, rgb(255 255 255 / 0%)); }
+
+.header { color: #000; background: #aaa linear-gradient(#888, #999);
+ display: flex; justify-content: space-between; column-gap: .5rem; }
+.header nav { display: flex; margin: 0 .5rem; align-items: end; }
+.header nav a { display: block; text-decoration: none;
+ background: #bbb linear-gradient(#bbb, #ccc);
+ margin: .25rem 0 0 -1px; padding: .25rem .75rem;
+ border: 1px solid #888; border-radius: .5rem .5rem 0 0; }
+.header nav a.active { font-weight: bold; border-bottom: 1px solid #fff;
+ background: #fff linear-gradient(#eee, #fff); }
+.header nav a.active, .header nav a:hover { padding-bottom: .4rem; }
+.header .activity { padding: .25rem .5rem; align-self: center; color: #fff; }
+.header .activity.error { color: #f00; }
+
+.sidebar { padding: .25rem .5rem; background: var(--shade-color);
+ border-right: 1px solid #ccc; overflow: auto;
+ min-width: 10rem; max-width: 20rem; flex-shrink: 0; }
+.sidebar h2 { margin: 0.5em 0 0.25em 0; padding: 0; font-size: 1.2rem; }
+.sidebar ul { margin: .5rem 0; padding: 0; }
+
+.sidebar .path { margin: .5rem -.5rem; }
+.sidebar .path li { margin: 0; padding: 0; }
+.sidebar .path li a { padding: .25rem .5rem; padding-left: 30px;
+ display: block; text-decoration: none; white-space: nowrap; }
+.sidebar .path li a:hover { background-color: rgb(0 0 0 / 10%); }
+
+.sidebar .path li.parent a {
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='20' height='20'%3E%3Cpath d='M 4 14 10 8 16 14' stroke='%23888' stroke-width='4' fill='none' /%3E%3C/svg%3E%0A");
+ background-repeat: no-repeat; background-position: 5px center; }
+
+.sidebar .path li.selected a { font-weight: bold;
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='20' height='20'%3E%3Ccircle cx='10' cy='10' r='6' fill='%23888' /%3E%3C/svg%3E%0A");
+ background-repeat: no-repeat; background-position: 5px center; }
+
+.sidebar .path li.child a {
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='20' height='20'%3E%3Cpath d='M 4 6 10 12 16 6' stroke='%23888' stroke-width='4' fill='none' /%3E%3C/svg%3E%0A");
+ background-repeat: no-repeat; background-position: 5px center; }
+
+.browser { overflow: auto; display: flex; flex-wrap: wrap;
+ align-content: flex-start; justify-content: center; align-items: center;
+ gap: 3px; padding: 9px; flex-grow: 1; }
+.browser:focus-visible { outline: 0; box-shadow: none; }
+
+.tags { padding: .5rem; flex-grow: 1; overflow: auto; }
+.tags:focus-visible { outline: 0; box-shadow: none; }
+.tags h2 { margin: .5em 0 .25em 0; padding: 0; font-size: 1.1rem; }
+.tags p { margin: .25em 0; }
+.tags ul { display: flex; margin: .5em 0; padding: 0;
+ flex-wrap: wrap; gap: .25em; }
+.tags ul li { display: block; margin: 0; padding: .25em .5em;
+ border-radius: .5rem; background: var(--shade-color); }
+
+img.thumbnail { display: block;
+ background: repeating-conic-gradient(#eee 0% 25%, transparent 0% 50%)
+ 50% / 20px 20px; }
+img.thumbnail, .thumbnail.missing { box-shadow: 0 0 3px rgba(0, 0, 0, 0.75);
+ margin: 3px; border: 0px solid #000; }
+.thumbnail.missing { width: 128px; height: 128px; position: relative; }
+.thumbnail.missing::after { content: '?'; font-size: 64px;
+ position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); }
+
+.view { display: flex; flex-grow: 1; overflow: hidden;
+ justify-content: center; align-items: center; }
+.view img { max-width: 100%; max-height: 100%; object-fit: contain; }
+.view img { z-index: 1; }
+
+.viewbar { padding: .25rem .5rem; background: #eee;
+ border-left: 1px solid #ccc; min-width: 20rem; overflow: auto; }
+.viewbar h2 { margin: 0.5em 0 0.25em 0; padding: 0; font-size: 1.2rem; }
+.viewbar h3 { margin: 0.25em 0; padding: 0; font-size: 1.1rem; }
+.viewbar ul { margin: 0; padding: 0 0 0 1.25em; list-style-type: "- "; }
+.viewbar ul.tags { padding: 0; list-style-type: none; }
+.viewbar li { margin: 0; padding: 0; }
+
+.sidebar meter,
+.viewbar meter { width: 1.25rem;
+ /* background: white; border: 1px solid #ccc; */ }
+
+.similar { padding: .5rem; flex-grow: 1; overflow: auto; }
+.similar h2 { margin: 1em 0 0.5em 0; padding: 0; font-size: 1.2rem; }
+.similar .row { display: flex; margin: .5rem 0; }
+.similar .row ul { margin: 0; padding: 0 0 0 1.25em; list-style-type: "- "; }
+
+.duplicates,
+.orphans { padding: .5rem; flex-grow: 1; overflow: auto; }
+.duplicates .row,
+.orphans .row { display: flex; margin: .5rem 0; align-items: center; gap: 3px; }
+
+.orphans .row { margin-bottom: 1.25rem; }
+.orphans h2 { margin: 0.25em 0; padding: 0; font-size: 1.1rem; }
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..2f12d07
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,65 @@
+#!/bin/sh -xe
+cd "$(dirname "$0")"
+make gallery
+target=/tmp/G input=/tmp/G/Test
+rm -rf $target
+
+mkdir -p $target $input/Test $input/Empty
+gen() { magick "$@"; sha1=$(sha1sum "$(eval echo \$\{$#\})" | cut -d' ' -f1); }
+
+gen wizard: $input/wizard.webp
+gen -seed 10 -size 256x256 plasma:fractal \
+ $input/Test/dhash.jpg
+gen -seed 10 -size 256x256 plasma:fractal \
+ $input/Test/dhash.png
+sha1duplicate=$sha1
+cp $input/Test/dhash.png \
+ $input/Test/multiple-paths.png
+
+gen -seed 20 -size 160x128 plasma:fractal \
+ -bordercolor transparent -border 64 \
+ $input/Test/transparent-wide.png
+gen -seed 30 -size 1024x256 plasma:fractal \
+ -alpha set -channel A -evaluate multiply 0.2 \
+ $input/Test/translucent-superwide.png
+
+gen -size 96x96 -delay 10 -loop 0 \
+ -seed 111 plasma:fractal \
+ -seed 222 plasma:fractal \
+ -seed 333 plasma:fractal \
+ -seed 444 plasma:fractal \
+ -seed 555 plasma:fractal \
+ -seed 666 plasma:fractal \
+ $input/Test/animation-small.gif
+sha1animated=$sha1
+gen $input/Test/animation-small.gif \
+ $input/Test/video.mp4
+
+./gallery init $target
+./gallery sync $target $input "$@"
+./gallery thumbnail $target
+./gallery dhash $target
+./gallery tag $target test "Test space" <<-END
+ $sha1duplicate foo 1.0
+ $sha1duplicate bar 0.5
+ $sha1animated foo 0.8
+END
+
+# TODO: Test all the various possible sync transitions.
+mv $input/Test $input/Plasma
+./gallery sync $target $input
+
+./gallery web $target :8080 &
+web=$!
+trap "kill $web; wait $web" EXIT INT TERM
+sleep 0.25
+
+call() (curl http://localhost:8080/api/$1 -X POST --data-binary @-)
+
+# TODO: Verify that things are how we expect them to be.
+echo '{"path":"'"$(basename "$input")"'"}' | call browse
+echo '{}' | call tags
+echo '{}' | call duplicates
+echo '{}' | call orphans
+echo '{"sha1":"'"$sha1duplicate"'"}' | call info
+echo '{"sha1":"'"$sha1duplicate"'"}' | call similar