diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2023-12-08 02:16:04 +0100 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2023-12-29 15:02:28 +0100 |
commit | 054078908a1e4c7429ea0f5a3a0605addfccc46c (patch) | |
tree | 7a6dd29cd4381bd655fa78f5866f25c552d05072 /main.go | |
download | gallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.tar.gz gallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.tar.xz gallery-054078908a1e4c7429ea0f5a3a0605addfccc46c.zip |
Initial commit
Diffstat (limited to 'main.go')
-rw-r--r-- | main.go | 2497 |
1 files changed, 2497 insertions, 0 deletions
@@ -0,0 +1,2497 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "crypto/sha1" + "database/sql" + "encoding/hex" + "encoding/json" + "errors" + "flag" + "fmt" + "html/template" + "image" + "image/color" + "io" + "io/fs" + "log" + "math" + "math/bits" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "regexp" + "runtime" + "slices" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/mattn/go-sqlite3" + "golang.org/x/image/draw" + "golang.org/x/image/webp" +) + +var ( + db *sql.DB // sqlite database + galleryDirectory string // gallery directory + + // taskSemaphore limits parallel computations. + taskSemaphore semaphore +) + +const ( + nameOfDB = "gallery.db" + nameOfImageRoot = "images" + nameOfThumbRoot = "thumbs" +) + +func hammingDistance(a, b int64) int { + return bits.OnesCount64(uint64(a) ^ uint64(b)) +} + +func init() { + sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{ + ConnectHook: func(conn *sqlite3.SQLiteConn) error { + return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/) + }, + }) +} + +func openDB(directory string) error { + var err error + db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory, + nameOfDB+"?_foreign_keys=1&_busy_timeout=1000")) + galleryDirectory = directory + return err +} + +func imagePath(sha1 string) string { + return filepath.Join(galleryDirectory, + nameOfImageRoot, sha1[:2], sha1) +} + +func thumbPath(sha1 string) string { + return filepath.Join(galleryDirectory, + nameOfThumbRoot, sha1[:2], sha1+".webp") +} + +func dbCollectStrings(query string, a ...any) ([]string, error) { + rows, err := db.Query(query, a...) + if err != nil { + return nil, err + } + defer rows.Close() + + result := []string{} + for rows.Next() { + var s string + if err := rows.Scan(&s); err != nil { + return nil, err + } + result = append(result, s) + } + if err := rows.Err(); err != nil { + return nil, err + } + return result, nil +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func idForDirectoryPath(tx *sql.Tx, path []string, create bool) (int64, error) { + var parent sql.NullInt64 + for _, name := range path { + if err := tx.QueryRow(`SELECT id FROM node + WHERE parent IS ? AND name = ? AND sha1 IS NULL`, + parent, name).Scan(&parent); err == nil { + continue + } else if !errors.Is(err, sql.ErrNoRows) { + return 0, err + } else if !create { + return 0, err + } + + // This fails when trying to override a leaf node. + // That needs special handling. + if result, err := tx.Exec( + `INSERT INTO node(parent, name) VALUES (?, ?)`, + parent, name); err != nil { + return 0, err + } else if id, err := result.LastInsertId(); err != nil { + return 0, err + } else { + parent = sql.NullInt64{Int64: id, Valid: true} + } + } + return parent.Int64, nil +} + +func decodeWebPath(path string) []string { + // Relative paths could be handled differently, + // but right now, they're assumed to start at the root. + result := []string{} + for _, crumb := range strings.Split(path, "/") { + if crumb != "" { + result = append(result, crumb) + } + } + return result +} + +// --- Semaphore --------------------------------------------------------------- + +type semaphore chan struct{} + +func newSemaphore(size int) semaphore { return make(chan struct{}, size) } +func (s semaphore) release() { <-s } + +func (s semaphore) acquire(ctx context.Context) error { + select { + case <-ctx.Done(): + return ctx.Err() + case s <- struct{}{}: + } + + // Give priority to context cancellation. + select { + case <-ctx.Done(): + s.release() + return ctx.Err() + default: + } + return nil +} + +// --- Progress bar ------------------------------------------------------------ + +type progressBar struct { + sync.Mutex + current int + target int +} + +func newProgressBar(target int) *progressBar { + pb := &progressBar{current: 0, target: target} + pb.Update() + return pb +} + +func (pb *progressBar) Stop() { + // The minimum thing that works: just print a newline. + os.Stdout.WriteString("\n") +} + +func (pb *progressBar) Update() { + if pb.target < 0 { + fmt.Printf("\r%d/?", pb.current) + return + } + + var fraction int + if pb.target != 0 { + fraction = int(float32(pb.current) / float32(pb.target) * 100) + } + + target := fmt.Sprintf("%d", pb.target) + fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, fraction) +} + +func (pb *progressBar) Step() { + pb.Lock() + defer pb.Unlock() + + pb.current++ + pb.Update() +} + +func (pb *progressBar) Interrupt(callback func()) { + pb.Lock() + defer pb.Unlock() + pb.Stop() + defer pb.Update() + + callback() +} + +// --- Parallelization --------------------------------------------------------- + +type parallelFunc func(item string) (message string, err error) + +// parallelize runs the callback in parallel on a list of strings, +// reporting progress and any non-fatal messages. +func parallelize(strings []string, callback parallelFunc) error { + pb := newProgressBar(len(strings)) + defer pb.Stop() + + ctx, cancel := context.WithCancelCause(context.Background()) + wg := sync.WaitGroup{} + for _, item := range strings { + if taskSemaphore.acquire(ctx) != nil { + break + } + + wg.Add(1) + go func(item string) { + defer taskSemaphore.release() + defer wg.Done() + if message, err := callback(item); err != nil { + cancel(err) + } else if message != "" { + pb.Interrupt(func() { log.Printf("%s: %s\n", item, message) }) + } + pb.Step() + }(item) + } + wg.Wait() + if ctx.Err() != nil { + return context.Cause(ctx) + } + return nil +} + +// --- Initialization ---------------------------------------------------------- + +// cmdInit initializes a "gallery directory" that contains gallery.sqlite, +// images, thumbs. +func cmdInit(fs *flag.FlagSet, args []string) error { + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() != 1 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + if _, err := db.Exec(initializeSQL); err != nil { + return err + } + + // XXX: There's technically no reason to keep images as symlinks, + // we might just keep absolute paths in the database as well. + if err := os.MkdirAll( + filepath.Join(galleryDirectory, nameOfImageRoot), 0755); err != nil { + return err + } + if err := os.MkdirAll( + filepath.Join(galleryDirectory, nameOfThumbRoot), 0755); err != nil { + return err + } + return nil +} + +// --- Web --------------------------------------------------------------------- + +var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`) +var staticHandler http.Handler + +var page = template.Must(template.New("/").Parse(`<!DOCTYPE html><html><head> + <title>Gallery</title> + <meta charset="utf-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel=stylesheet href=style.css> +</head><body> + <noscript>This is a web application, and requires Javascript.</noscript> + <script src=mithril.js></script> + <script src=gallery.js></script> +</body></html>`)) + +func handleRequest(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + staticHandler.ServeHTTP(w, r) + return + } + if err := page.Execute(w, nil); err != nil { + log.Println(err) + } +} + +func handleImages(w http.ResponseWriter, r *http.Request) { + if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { + http.NotFound(w, r) + } else { + http.ServeFile(w, r, imagePath(m[1])) + } +} + +func handleThumbs(w http.ResponseWriter, r *http.Request) { + if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { + http.NotFound(w, r) + } else { + http.ServeFile(w, r, thumbPath(m[1])) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) { + return dbCollectStrings(`SELECT name FROM node + WHERE IFNULL(parent, 0) = ? AND sha1 IS NULL`, parent) +} + +type webEntry struct { + SHA1 string `json:"sha1"` + Name string `json:"name"` + Modified int64 `json:"modified"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` +} + +func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) { + rows, err := tx.Query(` + SELECT i.sha1, n.name, n.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0) + FROM node AS n + JOIN image AS i ON n.sha1 = i.sha1 + WHERE n.parent = ?`, parent) + if err != nil { + return nil, err + } + defer rows.Close() + + entries = []webEntry{} + for rows.Next() { + var e webEntry + if err = rows.Scan( + &e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil { + return nil, err + } + entries = append(entries, e) + } + return entries, rows.Err() +} + +func handleAPIBrowse(w http.ResponseWriter, r *http.Request) { + var params struct { + Path string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var result struct { + Subdirectories []string `json:"subdirectories"` + Entries []webEntry `json:"entries"` + } + + tx, err := db.Begin() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer tx.Rollback() + + parent, err := idForDirectoryPath(tx, decodeWebPath(params.Path), false) + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + result.Subdirectories, err = getSubdirectories(tx, parent) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + result.Entries, err = getSubentries(tx, parent) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +type webTagNamespace struct { + Description string `json:"description"` + Tags map[string]int64 `json:"tags"` +} + +func getTags(nsID int64) (result map[string]int64, err error) { + rows, err := db.Query(` + SELECT t.name, COUNT(ta.tag) AS count + FROM tag AS t + LEFT JOIN tag_assignment AS ta ON t.id = ta.tag + WHERE t.space = ? + GROUP BY t.id`, nsID) + if err != nil { + return + } + defer rows.Close() + + result = make(map[string]int64) + for rows.Next() { + var ( + name string + count int64 + ) + if err = rows.Scan(&name, &count); err != nil { + return + } + result[name] = count + } + return result, rows.Err() +} + +func getTagNamespaces(match *string) ( + result map[string]webTagNamespace, err error) { + var rows *sql.Rows + if match != nil { + rows, err = db.Query(`SELECT id, name, IFNULL(description, '') + FROM tag_space WHERE name = ?`, *match) + } else { + rows, err = db.Query(`SELECT id, name, IFNULL(description, '') + FROM tag_space`) + } + if err != nil { + return + } + defer rows.Close() + + result = make(map[string]webTagNamespace) + for rows.Next() { + var ( + id int64 + name string + ns webTagNamespace + ) + if err = rows.Scan(&id, &name, &ns.Description); err != nil { + return + } + if ns.Tags, err = getTags(id); err != nil { + return + } + result[name] = ns + } + return result, rows.Err() +} + +func handleAPITags(w http.ResponseWriter, r *http.Request) { + var params struct { + Namespace *string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + result, err := getTagNamespaces(params.Namespace) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +type webDuplicateImage struct { + SHA1 string `json:"sha1"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` + Occurences int64 `json:"occurences"` +} + +// A hamming distance of zero (direct dhash match) will be more than sufficient. +const duplicatesCTE = `WITH + duplicated(dhash, count) AS ( + SELECT dhash, COUNT(*) AS count FROM image + WHERE dhash IS NOT NULL + GROUP BY dhash HAVING count > 1 + ), + multipathed(sha1, count) AS ( + SELECT n.sha1, COUNT(*) AS count FROM node AS n + JOIN image AS i ON i.sha1 = n.sha1 + WHERE i.dhash IS NULL + OR i.dhash NOT IN (SELECT dhash FROM duplicated) + GROUP BY n.sha1 HAVING count > 1 + ) +` + +func getDuplicatesSimilar(stmt *sql.Stmt, dhash int64) ( + result []webDuplicateImage, err error) { + rows, err := stmt.Query(dhash) + if err != nil { + return nil, err + } + defer rows.Close() + + result = []webDuplicateImage{} + for rows.Next() { + var image webDuplicateImage + if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH, + &image.Occurences); err != nil { + return nil, err + } + result = append(result, image) + } + return result, rows.Err() +} + +func getDuplicates1(result [][]webDuplicateImage) ( + [][]webDuplicateImage, error) { + stmt, err := db.Prepare(` + SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), + COUNT(*) AS occurences + FROM image AS i + JOIN node AS n ON n.sha1 = i.sha1 + WHERE i.dhash = ? + GROUP BY n.sha1`) + if err != nil { + return nil, err + } + defer stmt.Close() + + rows, err := db.Query(duplicatesCTE + `SELECT dhash FROM duplicated`) + if err != nil { + return nil, err + } + defer rows.Close() + + for rows.Next() { + var ( + group []webDuplicateImage + dhash int64 + ) + if err = rows.Scan(&dhash); err != nil { + return nil, err + } + if group, err = getDuplicatesSimilar(stmt, dhash); err != nil { + return nil, err + } + result = append(result, group) + } + return result, rows.Err() +} + +func getDuplicates2(result [][]webDuplicateImage) ( + [][]webDuplicateImage, error) { + stmt, err := db.Prepare(` + SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), + COUNT(*) AS occurences + FROM image AS i + JOIN node AS n ON n.sha1 = i.sha1 + WHERE i.sha1 = ? + GROUP BY n.sha1`) + if err != nil { + return nil, err + } + defer stmt.Close() + + rows, err := db.Query(duplicatesCTE + `SELECT sha1 FROM multipathed`) + if err != nil { + return nil, err + } + defer rows.Close() + + for rows.Next() { + var ( + image webDuplicateImage + sha1 string + ) + if err = rows.Scan(&sha1); err != nil { + return nil, err + } + if err := stmt.QueryRow(sha1).Scan(&image.SHA1, + &image.ThumbW, &image.ThumbH, &image.Occurences); err != nil { + return nil, err + } + result = append(result, []webDuplicateImage{image}) + } + return result, rows.Err() +} + +func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) { + var params struct{} + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var ( + result = [][]webDuplicateImage{} + err error + ) + if result, err = getDuplicates1(result); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if result, err = getDuplicates2(result); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +type webOrphanImage struct { + SHA1 string `json:"sha1"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` + Tags int64 `json:"tags"` +} + +type webOrphan struct { + webOrphanImage + LastPath string `json:"lastPath"` + Replacement *webOrphanImage `json:"replacement"` +} + +func getOrphanReplacement(webPath string) (*webOrphanImage, error) { + tx, err := db.Begin() + if err != nil { + return nil, err + } + defer tx.Rollback() + + path := decodeWebPath(webPath) + if len(path) == 0 { + return nil, nil + } + + parent, err := idForDirectoryPath(tx, path[:len(path)-1], false) + if err != nil { + return nil, err + } + + var image webOrphanImage + err = db.QueryRow(`SELECT i.sha1, + IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags + FROM node AS n + JOIN image AS i ON n.sha1 = i.sha1 + LEFT JOIN tag_assignment AS ta ON n.sha1 = ta.sha1 + WHERE n.parent = ? AND n.name = ? + GROUP BY n.sha1`, parent, path[len(path)-1]).Scan( + &image.SHA1, &image.ThumbW, &image.ThumbH, &image.Tags) + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } else if err != nil { + return nil, err + } + return &image, nil +} + +func getOrphans() (result []webOrphan, err error) { + rows, err := db.Query(`SELECT o.sha1, o.path, + IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags + FROM orphan AS o + JOIN image AS i ON o.sha1 = i.sha1 + LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1 + GROUP BY o.sha1`) + if err != nil { + return nil, err + } + defer rows.Close() + + result = []webOrphan{} + for rows.Next() { + var orphan webOrphan + if err = rows.Scan(&orphan.SHA1, &orphan.LastPath, + &orphan.ThumbW, &orphan.ThumbH, &orphan.Tags); err != nil { + return nil, err + } + + orphan.Replacement, err = getOrphanReplacement(orphan.LastPath) + if err != nil { + return nil, err + } + + result = append(result, orphan) + } + return result, rows.Err() +} + +func handleAPIOrphans(w http.ResponseWriter, r *http.Request) { + var params struct{} + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + result, err := getOrphans() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func getImageDimensions(sha1 string) (w int64, h int64, err error) { + err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`, + sha1).Scan(&w, &h) + return +} + +func getImagePaths(sha1 string) (paths []string, err error) { + rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS ( + SELECT parent, name AS path FROM node WHERE sha1 = ? + UNION ALL + SELECT n.parent, n.name || '/' || p.path + FROM node AS n JOIN paths AS p ON n.id = p.parent + ) SELECT path FROM paths WHERE parent IS NULL`, sha1) + if err != nil { + return nil, err + } + defer rows.Close() + + paths = []string{} + for rows.Next() { + var path string + if err := rows.Scan(&path); err != nil { + return nil, err + } + paths = append(paths, path) + } + return paths, rows.Err() +} + +func getImageTags(sha1 string) (map[string]map[string]float32, error) { + rows, err := db.Query(` + SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta + JOIN tag AS t ON t.id = ta.tag + JOIN tag_space AS ts ON ts.id = t.space + WHERE ta.sha1 = ?`, sha1) + if err != nil { + return nil, err + } + defer rows.Close() + + result := make(map[string]map[string]float32) + for rows.Next() { + var ( + space, tag string + weight float32 + ) + if err := rows.Scan(&space, &tag, &weight); err != nil { + return nil, err + } + + tags := result[space] + if tags == nil { + tags = make(map[string]float32) + result[space] = tags + } + tags[tag] = weight + } + return result, rows.Err() +} + +func handleAPIInfo(w http.ResponseWriter, r *http.Request) { + var params struct { + SHA1 string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var result struct { + Width int64 `json:"width"` + Height int64 `json:"height"` + Paths []string `json:"paths"` + Tags map[string]map[string]float32 `json:"tags"` + } + + var err error + result.Width, result.Height, err = getImageDimensions(params.SHA1) + if errors.Is(err, sql.ErrNoRows) { + http.Error(w, err.Error(), http.StatusNotFound) + return + } else if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + result.Paths, err = getImagePaths(params.SHA1) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + result.Tags, err = getImageTags(params.SHA1) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +type webSimilarImage struct { + SHA1 string `json:"sha1"` + PixelsRatio float32 `json:"pixelsRatio"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` + Paths []string `json:"paths"` +} + +func getSimilar(sha1 string, dhash int64, pixels int64, distance int) ( + result []webSimilarImage, err error) { + // For distance ∈ {0, 1}, this query is quite inefficient. + // In exchange, it's generic. + // + // If there's a dhash, there should also be thumbnail dimensions, + // so not bothering with IFNULL on them. + rows, err := db.Query(` + SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0) + FROM image WHERE sha1 <> ? AND dhash IS NOT NULL + AND hamming(dhash, ?) = ?`, sha1, dhash, distance) + if err != nil { + return nil, err + } + defer rows.Close() + + result = []webSimilarImage{} + for rows.Next() { + var ( + match webSimilarImage + matchPixels int64 + ) + if err = rows.Scan(&match.SHA1, + &matchPixels, &match.ThumbW, &match.ThumbH); err != nil { + return nil, err + } + if match.Paths, err = getImagePaths(match.SHA1); err != nil { + return nil, err + } + match.PixelsRatio = float32(matchPixels) / float32(pixels) + result = append(result, match) + } + return result, rows.Err() +} + +func getSimilarGroups(sha1 string, dhash int64, pixels int64, + output map[string][]webSimilarImage) error { + var err error + for distance := 0; distance <= 1; distance++ { + output[fmt.Sprintf("Perceptual distance %d", distance)], err = + getSimilar(sha1, dhash, pixels, distance) + if err != nil { + return err + } + } + return nil +} + +func handleAPISimilar(w http.ResponseWriter, r *http.Request) { + var params struct { + SHA1 string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var result struct { + Info webSimilarImage `json:"info"` + Groups map[string][]webSimilarImage `json:"groups"` + } + + result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1} + if paths, err := getImagePaths(params.SHA1); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } else { + result.Info.Paths = paths + } + + var ( + width, height int64 + dhash sql.NullInt64 + ) + err := db.QueryRow(` + SELECT width, height, dhash, IFNULL(thumbw, 0), IFNULL(thumbh, 0) + FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &dhash, + &result.Info.ThumbW, &result.Info.ThumbH) + if errors.Is(err, sql.ErrNoRows) { + http.Error(w, err.Error(), http.StatusNotFound) + return + } else if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + result.Groups = make(map[string][]webSimilarImage) + if dhash.Valid { + if err := getSimilarGroups( + params.SHA1, dhash.Int64, width*height, result.Groups); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// NOTE: AND will mean MULTIPLY(IFNULL(ta.weight, 0)) per SHA1. +const searchCTE = `WITH + matches(sha1, thumbw, thumbh, score) AS ( + SELECT i.sha1, i.thumbw, i.thumbh, ta.weight AS score + FROM tag_assignment AS ta + JOIN image AS i ON i.sha1 = ta.sha1 + WHERE ta.tag = ? + ), + supertags(tag) AS ( + SELECT DISTINCT ta.tag + FROM tag_assignment AS ta + JOIN matches AS m ON m.sha1 = ta.sha1 + ), + scoredtags(tag, score) AS ( + -- The cross join is a deliberate optimization, + -- and this query may still be really slow. + SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score + FROM matches AS m + CROSS JOIN supertags AS st + LEFT JOIN tag_assignment AS ta + ON ta.sha1 = m.sha1 AND ta.tag = st.tag + GROUP BY st.tag + -- Using the column alias doesn't fail, but it also doesn't work. + HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01 + ) +` + +type webTagMatch struct { + SHA1 string `json:"sha1"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` + Score float32 `json:"score"` +} + +func getTagMatches(tag int64) (matches []webTagMatch, err error) { + rows, err := db.Query(searchCTE+` + SELECT sha1, IFNULL(thumbw, 0), IFNULL(thumbh, 0), score + FROM matches`, tag) + if err != nil { + return nil, err + } + defer rows.Close() + + matches = []webTagMatch{} + for rows.Next() { + var match webTagMatch + if err = rows.Scan(&match.SHA1, + &match.ThumbW, &match.ThumbH, &match.Score); err != nil { + return nil, err + } + matches = append(matches, match) + } + return matches, rows.Err() +} + +type webTagRelated struct { + Tag string `json:"tag"` + Score float32 `json:"score"` +} + +func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) { + rows, err := db.Query(searchCTE+` + SELECT ts.name, t.name, st.score FROM scoredtags AS st + JOIN tag AS t ON st.tag = t.id + JOIN tag_space AS ts ON ts.id = t.space + ORDER BY st.score DESC`, tag) + if err != nil { + return nil, err + } + defer rows.Close() + + result = make(map[string][]webTagRelated) + for rows.Next() { + var ( + space string + r webTagRelated + ) + if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil { + return nil, err + } + result[space] = append(result[space], r) + } + return result, rows.Err() +} + +func handleAPISearch(w http.ResponseWriter, r *http.Request) { + var params struct { + Query string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var result struct { + Matches []webTagMatch `json:"matches"` + Related map[string][]webTagRelated `json:"related"` + } + + space, tag, _ := strings.Cut(params.Query, ":") + + var tagID int64 + err := db.QueryRow(` + SELECT t.id FROM tag AS t + JOIN tag_space AS ts ON t.space = ts.id + WHERE ts.name = ? AND t.name = ?`, space, tag).Scan(&tagID) + if errors.Is(err, sql.ErrNoRows) { + http.Error(w, err.Error(), http.StatusNotFound) + return + } else if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if result.Matches, err = getTagMatches(tagID); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if result.Related, err = getTagRelated(tagID); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// cmdWeb runs a web UI against GD on ADDRESS. +func cmdWeb(fs *flag.FlagSet, args []string) error { + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() != 2 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + address := fs.Arg(1) + + // This separation is not strictly necessary, + // but having an elementary level of security doesn't hurt either. + staticHandler = http.FileServer(http.Dir("public")) + + http.HandleFunc("/", handleRequest) + http.HandleFunc("/image/", handleImages) + http.HandleFunc("/thumb/", handleThumbs) + http.HandleFunc("/api/browse", handleAPIBrowse) + http.HandleFunc("/api/tags", handleAPITags) + http.HandleFunc("/api/duplicates", handleAPIDuplicates) + http.HandleFunc("/api/orphans", handleAPIOrphans) + http.HandleFunc("/api/info", handleAPIInfo) + http.HandleFunc("/api/similar", handleAPISimilar) + http.HandleFunc("/api/search", handleAPISearch) + + host, port, err := net.SplitHostPort(address) + if err != nil { + log.Println(err) + } else if host == "" { + log.Println("http://" + net.JoinHostPort("localhost", port)) + } else { + log.Println("http://" + address) + } + + s := &http.Server{ + Addr: address, + ReadTimeout: 60 * time.Second, + WriteTimeout: 60 * time.Second, + MaxHeaderBytes: 32 << 10, + } + return s.ListenAndServe() +} + +// --- Sync -------------------------------------------------------------------- + +type syncFileInfo struct { + dbID int64 // DB node ID, or zero if there was none + dbParent int64 // where the file was to be stored + dbName string // the name under which it was to be stored + fsPath string // symlink target + fsMtime int64 // last modified Unix timestamp, used a bit like an ID + + err error // any processing error + sha1 string // raw content hash, empty to skip file + width int // image width in pixels + height int // image height in pixels +} + +type syncContext struct { + ctx context.Context + tx *sql.Tx + info chan syncFileInfo + pb *progressBar + + stmtOrphan *sql.Stmt + stmtDisposeSub *sql.Stmt + stmtDisposeAll *sql.Stmt + + // linked tracks which image hashes we've checked so far in the run. + linked map[string]struct{} +} + +func syncPrintf(c *syncContext, format string, v ...any) { + c.pb.Interrupt(func() { log.Printf(format+"\n", v...) }) +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +type syncNode struct { + dbID int64 + dbName string + dbMtime int64 + dbSHA1 string +} + +func (n *syncNode) dbIsDir() bool { return n.dbSHA1 == "" } + +type syncFile struct { + fsName string + fsMtime int64 + fsIsDir bool +} + +type syncPair struct { + db *syncNode + fs *syncFile +} + +// syncGetNodes returns direct children of a DB node, ordered by name. +// SQLite, like Go, compares strings byte-wise by default. +func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) { + // This works even for the root, which doesn't exist as a DB node. + rows, err := tx.Query(`SELECT id, name, IFNULL(mtime, 0), IFNULL(sha1, '') + FROM node WHERE IFNULL(parent, 0) = ? ORDER BY name`, dbParent) + if err != nil { + return + } + defer rows.Close() + + for rows.Next() { + var node syncNode + if err = rows.Scan(&node.dbID, + &node.dbName, &node.dbMtime, &node.dbSHA1); err != nil { + return + } + nodes = append(nodes, node) + } + return nodes, rows.Err() +} + +// syncGetFiles returns direct children of a FS directory, ordered by name. +func syncGetFiles(fsPath string) (files []syncFile, err error) { + dir, err := os.Open(fsPath) + if err != nil { + return + } + defer dir.Close() + + entries, err := dir.ReadDir(0) + if err != nil { + return + } + + for _, entry := range entries { + info, err := entry.Info() + if err != nil { + return files, err + } + + files = append(files, syncFile{ + fsName: entry.Name(), + fsMtime: info.ModTime().Unix(), + fsIsDir: entry.IsDir(), + }) + } + sort.Slice(files, + func(a, b int) bool { return files[a].fsName < files[b].fsName }) + return +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func syncIsImage(path string) (bool, error) { + out, err := exec.Command("xdg-mime", "query", "filetype", path).Output() + if err != nil { + return false, err + } + + return bytes.HasPrefix(out, []byte("image/")), nil +} + +func syncPingImage(path string) (int, int, error) { + out, err := exec.Command("magick", "identify", "-limit", "thread", "1", + "-ping", "-format", "%w %h", path+"[0]").Output() + if err != nil { + return 0, 0, err + } + + var w, h int + _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) + return w, h, err +} + +func syncProcess(c *syncContext, info *syncFileInfo) error { + // Skip videos, which ImageMagick can process, but we don't want it to, + // so that they're not converted 1:1 to WebP. + pathIsImage, err := syncIsImage(info.fsPath) + if err != nil { + return err + } + if !pathIsImage { + return nil + } + + info.width, info.height, err = syncPingImage(info.fsPath) + if err != nil { + return err + } + + f, err := os.Open(info.fsPath) + if err != nil { + return err + } + defer f.Close() + + // We could make this at least somewhat interruptible by c.ctx, + // though it would still work poorly. + hash := sha1.New() + _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) + if err != nil { + return err + } + + info.sha1 = hex.EncodeToString(hash.Sum(nil)) + return nil +} + +// syncEnqueue runs file scanning, which can be CPU and I/O expensive, +// in parallel. The goroutine only touches the filesystem, read-only. +func syncEnqueue(c *syncContext, info syncFileInfo) error { + if err := taskSemaphore.acquire(c.ctx); err != nil { + return err + } + + go func(info syncFileInfo) { + defer taskSemaphore.release() + info.err = syncProcess(c, &info) + c.info <- info + }(info) + return nil +} + +// syncDequeue flushes the result queue of finished asynchronous tasks. +func syncDequeue(c *syncContext) error { + for { + select { + case <-c.ctx.Done(): + return c.ctx.Err() + case info := <-c.info: + if err := syncPostProcess(c, info); err != nil { + return err + } + default: + return nil + } + } +} + +// syncDispose creates orphan records for the entire subtree given by nodeID +// as appropriate, then deletes all nodes within the subtree. The subtree root +// node is not deleted if "keepNode" is true. +// +// Orphans keep their thumbnail files, as evidence. +func syncDispose(c *syncContext, nodeID int64, keepNode bool) error { + if _, err := c.stmtOrphan.Exec(nodeID); err != nil { + return err + } + + if keepNode { + if _, err := c.stmtDisposeSub.Exec(nodeID); err != nil { + return err + } + } else { + if _, err := c.stmtDisposeAll.Exec(nodeID); err != nil { + return err + } + } + return nil +} + +func syncImageResave(c *syncContext, path string, target string) error { + dirname, _ := filepath.Split(path) + if err := os.MkdirAll(dirname, 0755); err != nil { + return err + } + + for { + // Try to remove anything standing in the way. + err := os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + // TODO: Make it possible to copy or reflink (ioctl FICLONE). + err = os.Symlink(target, path) + if err == nil || !errors.Is(err, fs.ErrExist) { + return err + } + } +} + +func syncImageSave(c *syncContext, sha1 string, target string) error { + if _, ok := c.linked[sha1]; ok { + return nil + } + + ok, path := false, imagePath(sha1) + if link, err := os.Readlink(path); err == nil { + ok = link == target + } else { + // If it exists, but it is not a symlink, let it be. + // Even though it may not be a regular file. + ok = errors.Is(err, syscall.EINVAL) + } + + if !ok { + if err := syncImageResave(c, path, target); err != nil { + return err + } + } + + c.linked[sha1] = struct{}{} + return nil +} + +func syncImage(c *syncContext, info syncFileInfo) error { + if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height) + VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, + info.sha1, info.width, info.height); err != nil { + return err + } + + return syncImageSave(c, info.sha1, info.fsPath) +} + +func syncPostProcess(c *syncContext, info syncFileInfo) error { + defer c.pb.Step() + + // TODO: When replacing an image node (whether it has or doesn't have + // other links to keep it alive), we could offer copying all tags, + // though this needs another table to track it. + // (If it's equivalent enough, the dhash will stay the same, + // so user can resolve this through the duplicates feature.) + switch { + case info.err != nil: + // * → error + if ee, ok := info.err.(*exec.ExitError); ok { + syncPrintf(c, "%s: %s", info.fsPath, ee.Stderr) + } else { + return info.err + } + fallthrough + + case info.sha1 == "": + // 0 → 0 + if info.dbID == 0 { + return nil + } + + // D → 0, F → 0 + // TODO: Make it possible to disable removal (for copying only?) + return syncDispose(c, info.dbID, false /*keepNode*/) + + case info.dbID == 0: + // 0 → F + if err := syncImage(c, info); err != nil { + return err + } + if _, err := c.tx.Exec(`INSERT INTO node(parent, name, mtime, sha1) + VALUES (?, ?, ?, ?)`, + info.dbParent, info.dbName, info.fsMtime, info.sha1); err != nil { + return err + } + return nil + + default: + // D → F, F → F (this statement is a no-op with the latter) + if err := syncDispose(c, info.dbID, true /*keepNode*/); err != nil { + return err + } + + // Even if the hash didn't change, see comment in syncDirectoryPair(). + if err := syncImage(c, info); err != nil { + return err + } + if _, err := c.tx.Exec(`UPDATE node SET mtime = ?, sha1 = ? + WHERE id = ?`, info.fsMtime, info.sha1, info.dbID); err != nil { + return err + } + return nil + } +} + +func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string, + pair syncPair) error { + db, fs, fsInfo := pair.db, pair.fs, syncFileInfo{dbParent: dbParent} + if db != nil { + fsInfo.dbID = db.dbID + } + if fs != nil { + fsInfo.dbName = fs.fsName + fsInfo.fsPath = filepath.Join(fsPath, fs.fsName) + fsInfo.fsMtime = fs.fsMtime + } + + switch { + case db == nil && fs == nil: + // 0 → 0, unreachable. + + case db == nil && fs.fsIsDir: + // 0 → D + var id int64 + if result, err := c.tx.Exec(`INSERT INTO node(parent, name) + VALUES (?, ?)`, dbParent, fs.fsName); err != nil { + return err + } else if id, err = result.LastInsertId(); err != nil { + return err + } + return syncDirectory(c, id, fsInfo.fsPath) + + case db == nil: + // 0 → F (or 0 → 0) + return syncEnqueue(c, fsInfo) + + case fs == nil: + // D → 0, F → 0 + // TODO: Make it possible to disable removal (for copying only?) + return syncDispose(c, db.dbID, false /*keepNode*/) + + case db.dbIsDir() && fs.fsIsDir: + // D → D + return syncDirectory(c, db.dbID, fsInfo.fsPath) + + case db.dbIsDir(): + // D → F (or D → 0) + return syncEnqueue(c, fsInfo) + + case fs.fsIsDir: + // F → D + if err := syncDispose(c, db.dbID, true /*keepNode*/); err != nil { + return err + } + if _, err := c.tx.Exec(`UPDATE node + SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil { + return err + } + return syncDirectory(c, db.dbID, fsInfo.fsPath) + + case db.dbMtime != fs.fsMtime: + // F → F (or F → 0) + // Assuming that any content modifications change the timestamp. + return syncEnqueue(c, fsInfo) + + default: + // F → F + // Try to fix symlinks, to handle the following situations: + // 1. Image A occurs in paths 1 and 2, we use a symlink to path 1, + // and path 1 is removed from the filesystem: + // path 2 would not resolve if the mtime didn't change. + // 2. Image A occurs in paths 1 and 2, we use a symlink to path 1, + // and path 1 is changed: + // path 2 would resolve to the wrong file. + // This may relink images with multiple occurences unnecessarily, + // but it will always fix the roots that are being synced. + if err := syncImageSave(c, db.dbSHA1, fsInfo.fsPath); err != nil { + return err + } + } + return nil +} + +func syncDirectory(c *syncContext, dbParent int64, fsPath string) error { + db, err := syncGetNodes(c.tx, dbParent) + if err != nil { + return err + } + + fs, err := syncGetFiles(fsPath) + if err != nil { + return err + } + + // This would not be fatal, but it has annoying consequences. + if _, ok := slices.BinarySearchFunc(fs, syncFile{fsName: nameOfDB}, + func(a, b syncFile) int { + return strings.Compare(a.fsName, b.fsName) + }); ok { + syncPrintf(c, "%s may be a gallery directory, treating as empty", + fsPath) + fs = nil + } + + // Convert differences to a form more convenient for processing. + iDB, iFS, pairs := 0, 0, []syncPair{} + for iDB < len(db) && iFS < len(fs) { + if db[iDB].dbName == fs[iFS].fsName { + pairs = append(pairs, syncPair{&db[iDB], &fs[iFS]}) + iDB++ + iFS++ + } else if db[iDB].dbName < fs[iFS].fsName { + pairs = append(pairs, syncPair{&db[iDB], nil}) + iDB++ + } else { + pairs = append(pairs, syncPair{nil, &fs[iFS]}) + iFS++ + } + } + for i := range db[iDB:] { + pairs = append(pairs, syncPair{&db[iDB+i], nil}) + } + for i := range fs[iFS:] { + pairs = append(pairs, syncPair{nil, &fs[iFS+i]}) + } + + for _, pair := range pairs { + if err := syncDequeue(c); err != nil { + return err + } + if err := syncDirectoryPair(c, dbParent, fsPath, pair); err != nil { + return err + } + } + return nil +} + +func syncRoot(c *syncContext, dbPath []string, fsPath string) error { + // TODO: Support synchronizing individual files. + // This can only be treated as 0 → F, F → F, or D → F, that is, + // a variation on current syncEnqueue(), but dbParent must be nullable. + + // Figure out a database root (not trying to convert F → D on conflict, + // also because we don't know yet if the argument is a directory). + // + // Synchronizing F → D or * → F are special cases not worth implementing. + dbParent, err := idForDirectoryPath(c.tx, dbPath, true) + if err != nil { + return err + } + if err := syncDirectory(c, dbParent, fsPath); err != nil { + return err + } + + // Wait for all tasks to finish, and process the results of their work. + for i := 0; i < cap(taskSemaphore); i++ { + if err := taskSemaphore.acquire(c.ctx); err != nil { + return err + } + } + if err := syncDequeue(c); err != nil { + return err + } + + // This is not our semaphore, so prepare it for the next user. + for i := 0; i < cap(taskSemaphore); i++ { + taskSemaphore.release() + } + + // Delete empty directories, from the bottom of the tree up to, + // but not including, the inserted root. + // + // We need to do this at the end due to our recursive handling, + // as well as because of asynchronous file filtering. + stmt, err := c.tx.Prepare(` + WITH RECURSIVE subtree(id, parent, sha1, level) AS ( + SELECT id, parent, sha1, 1 FROM node WHERE id = ? + UNION ALL + SELECT n.id, n.parent, n.sha1, s.level + 1 + FROM node AS n JOIN subtree AS s ON n.parent = s.id + ) DELETE FROM node WHERE id IN ( + SELECT id FROM subtree WHERE level <> 1 AND sha1 IS NULL + AND id NOT IN (SELECT parent FROM node WHERE parent IS NOT NULL) + )`) + if err != nil { + return err + } + + for { + if result, err := stmt.Exec(dbParent); err != nil { + return err + } else if n, err := result.RowsAffected(); err != nil { + return err + } else if n == 0 { + return nil + } + } +} + +type syncPath struct { + db []string // database path, in terms of nodes + fs string // normalized filesystem path +} + +// syncResolveRoots normalizes filesystem paths given in command line arguments, +// and figures out a database path for each. Duplicates are skipped or rejected. +func syncResolveRoots(args []string, fullpaths bool) ( + roots []*syncPath, err error) { + for i := range args { + fs, err := filepath.Abs(filepath.Clean(args[i])) + if err != nil { + return nil, err + } + + roots = append(roots, + &syncPath{decodeWebPath(filepath.ToSlash(fs)), fs}) + } + + if fullpaths { + // Filter out duplicates. In this case, they're just duplicated work. + slices.SortFunc(roots, func(a, b *syncPath) int { + return strings.Compare(a.fs, b.fs) + }) + roots = slices.CompactFunc(roots, func(a, b *syncPath) bool { + if a.fs != b.fs && !strings.HasPrefix(b.fs, a.fs+"/") { + return false + } + log.Printf("asking to sync path twice: %s\n", b.fs) + return true + }) + } else { + // Keep just the basenames. + for _, path := range roots { + if len(path.db) > 0 { + path.db = path.db[len(path.db)-1:] + } + } + + // Different filesystem paths mapping to the same DB location + // are definitely a problem we would like to avoid, + // otherwise we don't care. + slices.SortFunc(roots, func(a, b *syncPath) int { + return slices.Compare(a.db, b.db) + }) + for i := 1; i < len(roots); i++ { + if slices.Equal(roots[i-1].db, roots[i].db) { + return nil, fmt.Errorf("duplicate root: %v", roots[i].db) + } + } + } + return +} + +const disposeCTE = `WITH RECURSIVE + root(id, sha1, parent, path) AS ( + SELECT id, sha1, parent, name FROM node WHERE id = ? + UNION ALL + SELECT r.id, r.sha1, n.parent, n.name || '/' || r.path + FROM node AS n JOIN root AS r ON n.id = r.parent + ), + children(id, sha1, path, level) AS ( + SELECT id, sha1, path, 1 FROM root WHERE parent IS NULL + UNION ALL + SELECT n.id, n.sha1, c.path || '/' || n.name, c.level + 1 + FROM node AS n JOIN children AS c ON n.parent = c.id + ), + removed(sha1, count, path) AS ( + SELECT sha1, COUNT(*) AS count, MIN(path) AS path + FROM children + GROUP BY sha1 + ), + orphaned(sha1, path, count, total) AS ( + SELECT r.sha1, r.path, r.count, COUNT(*) AS total + FROM removed AS r + JOIN node ON node.sha1 = r.sha1 + GROUP BY node.sha1 + HAVING count = total + )` + +// cmdSync ensures the given (sub)roots are accurately reflected +// in the database. +func cmdSync(fs *flag.FlagSet, args []string) error { + fullpaths := fs.Bool("fullpaths", false, "don't basename arguments") + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() < 2 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + roots, err := syncResolveRoots(fs.Args()[1:], *fullpaths) + if err != nil { + return err + } + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) + defer stop() + + // In case of a failure during processing, the only retained side effects + // on the filesystem tree are: + // - Fixing dead symlinks to images. + // - Creating symlinks to images that aren't used by anything. + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return err + } + defer tx.Rollback() + + // Mild hack: upgrade the transaction to a write one straight away, + // in order to rule out deadlocks (preventable failure). + if _, err := tx.Exec(`END TRANSACTION; + BEGIN IMMEDIATE TRANSACTION`); err != nil { + return err + } + + c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1), + linked: make(map[string]struct{})} + defer c.pb.Stop() + + if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + ` + INSERT OR IGNORE INTO orphan(sha1, path) + SELECT sha1, path FROM orphaned`); err != nil { + return err + } + if c.stmtDisposeSub, err = c.tx.Prepare(disposeCTE + ` + DELETE FROM node WHERE id + IN (SELECT DISTINCT id FROM children WHERE level <> 1)`); err != nil { + return err + } + if c.stmtDisposeAll, err = c.tx.Prepare(disposeCTE + ` + DELETE FROM node WHERE id + IN (SELECT DISTINCT id FROM children)`); err != nil { + return err + } + + // Info tasks take a position in the task semaphore channel. + // then fill the info channel. + // + // Immediately after syncDequeue(), the info channel is empty, + // but the semaphore might be full. + // + // By having at least one position in the info channel, + // we allow at least one info task to run to semaphore release, + // so that syncEnqueue() doesn't deadlock. + // + // By making it the same size as the semaphore, + // the end of this function doesn't need to dequeue while waiting. + // It also prevents goroutine leaks despite leaving them running-- + // once they finish their job, they're gone, + // and eventually the info channel would get garbage collected. + // + // The additional slot is there to handle the one result + // that may be placed while syncEnqueue() waits for the semaphore, + // i.e., it is for the result of the task that syncEnqueue() spawns. + c.info = make(chan syncFileInfo, cap(taskSemaphore)+1) + + for _, root := range roots { + if err := syncRoot(&c, root.db, root.fs); err != nil { + return err + } + } + return tx.Commit() +} + +// --- Removal ----------------------------------------------------------------- + +// cmdRemove is for manual removal of subtrees from the database. +// Beware that inputs are database, not filesystem paths. +func cmdRemove(fs *flag.FlagSet, args []string) error { + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() < 2 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + tx, err := db.BeginTx(context.Background(), nil) + if err != nil { + return err + } + defer tx.Rollback() + + for _, path := range fs.Args()[1:] { + var id sql.NullInt64 + for _, name := range decodeWebPath(path) { + if err := tx.QueryRow(`SELECT id FROM node + WHERE parent IS ? AND name = ?`, + id, name).Scan(&id); err != nil { + return err + } + } + if id.Int64 == 0 { + return errors.New("can't remove root") + } + + if _, err = tx.Exec(disposeCTE+` + INSERT OR IGNORE INTO orphan(sha1, path) + SELECT sha1, path FROM orphaned`, id); err != nil { + return err + } + if _, err = tx.Exec(disposeCTE+` + DELETE FROM node WHERE id + IN (SELECT DISTINCT id FROM children)`, id); err != nil { + return err + } + } + return tx.Commit() +} + +// --- Tagging ----------------------------------------------------------------- + +// cmdTag mass imports tags from data passed on stdin as a TSV +// of SHA1 TAG WEIGHT entries. +func cmdTag(fs *flag.FlagSet, args []string) error { + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() < 2 || fs.NArg() > 3 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + space := fs.Arg(1) + + var description sql.NullString + if fs.NArg() >= 3 { + description = sql.NullString{String: fs.Arg(2), Valid: true} + } + + // Note that starting as a write transaction prevents deadlocks. + // Imports are rare, and just bulk load data, so this scope is fine. + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description) + VALUES (?, ?)`, space, description); err != nil { + return err + } + + var spaceID int64 + if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`, + space).Scan(&spaceID); err != nil { + return err + } + + // XXX: It might make sense to pre-erase all tag assignments within + // the given space for that image, the first time we see it: + // + // DELETE FROM tag_assignment + // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?) + // + // or even just clear the tag space completely: + // + // DELETE FROM tag_assignment + // WHERE tag IN (SELECT id FROM tag WHERE space = ?); + // DELETE FROM tag WHERE space = ?; + stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight) + VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?) + ON CONFLICT DO UPDATE SET weight = ?`) + if err != nil { + return err + } + + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + fields := strings.Split(scanner.Text(), "\t") + if len(fields) != 3 { + return errors.New("invalid input format") + } + + sha1, tag := fields[0], fields[1] + weight, err := strconv.ParseFloat(fields[2], 64) + if err != nil { + return err + } + + if _, err := tx.Exec( + `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`, + spaceID, tag); err != nil { + return nil + } + if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil { + log.Printf("%s: %s\n", sha1, err) + } + } + if err := scanner.Err(); err != nil { + return err + } + return tx.Commit() +} + +// --- Check ------------------------------------------------------------------- + +func isValidSHA1(hash string) bool { + if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash { + return false + } + if _, err := hex.DecodeString(hash); err != nil { + return false + } + return true +} + +func hashesToFileListing(root, suffix string, hashes []string) []string { + // Note that we're semi-duplicating {image,thumb}Path(). + paths := []string{root} + for _, hash := range hashes { + dir := filepath.Join(root, hash[:2]) + paths = append(paths, dir, filepath.Join(dir, hash+suffix)) + } + slices.Sort(paths) + return slices.Compact(paths) +} + +func collectFileListing(root string) (paths []string, err error) { + err = filepath.WalkDir(root, + func(path string, d fs.DirEntry, err error) error { + paths = append(paths, path) + return err + }) + + // Even though it should already be sorted somehow. + slices.Sort(paths) + return +} + +func checkFiles(root, suffix string, hashes []string) (bool, []string, error) { + db := hashesToFileListing(root, suffix, hashes) + fs, err := collectFileListing(root) + if err != nil { + return false, nil, err + } + + iDB, iFS, ok, intersection := 0, 0, true, []string{} + for iDB < len(db) && iFS < len(fs) { + if db[iDB] == fs[iFS] { + intersection = append(intersection, db[iDB]) + iDB++ + iFS++ + } else if db[iDB] < fs[iFS] { + ok = false + fmt.Printf("only in DB: %s\n", db[iDB]) + iDB++ + } else { + ok = false + fmt.Printf("only in FS: %s\n", fs[iFS]) + iFS++ + } + } + for _, path := range db[iDB:] { + ok = false + fmt.Printf("only in DB: %s\n", path) + } + for _, path := range fs[iFS:] { + ok = false + fmt.Printf("only in FS: %s\n", path) + } + return ok, intersection, nil +} + +func checkHash(path string) (message string, err error) { + f, err := os.Open(path) + if err != nil { + return err.Error(), nil + } + defer f.Close() + + // We get 2 levels of parent directories in here, just filter them out. + if fi, err := f.Stat(); err != nil { + return err.Error(), nil + } else if fi.IsDir() { + return "", nil + } + + hash := sha1.New() + _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) + if err != nil { + return err.Error(), nil + } + + sha1 := hex.EncodeToString(hash.Sum(nil)) + if sha1 != filepath.Base(path) { + return fmt.Sprintf("mismatch, found %s", sha1), nil + } + return "", nil +} + +func checkHashes(paths []string) (bool, error) { + log.Println("checking image hashes") + var failed atomic.Bool + err := parallelize(paths, func(path string) (string, error) { + message, err := checkHash(path) + if message != "" { + failed.Store(true) + } + return message, err + }) + return !failed.Load(), err +} + +// cmdCheck carries out various database consistency checks. +func cmdCheck(fs *flag.FlagSet, args []string) error { + full := fs.Bool("full", false, "verify image hashes") + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() != 1 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + // Check if hashes are in the right format. + log.Println("checking image hashes") + + allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`) + if err != nil { + return err + } + + ok := true + for _, hash := range allSHA1 { + if !isValidSHA1(hash) { + ok = false + fmt.Printf("invalid image SHA1: %s\n", hash) + } + } + + // This is, rather obviously, just a strict subset. + // Although it doesn't run in the same transaction. + thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image + WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`) + if err != nil { + return err + } + + // This somewhat duplicates {image,thumb}Path(). + log.Println("checking SQL against filesystem") + okImages, intersection, err := checkFiles( + filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1) + if err != nil { + return err + } + + okThumbs, _, err := checkFiles( + filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1) + if err != nil { + return err + } + if !okImages || !okThumbs { + ok = false + } + + log.Println("checking for dead symlinks") + for _, path := range intersection { + if _, err := os.Stat(path); err != nil { + ok = false + fmt.Printf("%s: %s\n", path, err) + } + } + + if *full { + if ok2, err := checkHashes(intersection); err != nil { + return err + } else if !ok2 { + ok = false + } + } + + if !ok { + return errors.New("detected inconsistencies") + } + return nil +} + +// --- Thumbnailing ------------------------------------------------------------ + +func identifyThumbnail(path string) (w, h int, err error) { + f, err := os.Open(path) + if err != nil { + return + } + defer f.Close() + + config, err := webp.DecodeConfig(f) + if err != nil { + return + } + return config.Width, config.Height, nil +} + +func makeThumbnail(load bool, pathImage, pathThumb string) ( + w, h int, err error) { + if load { + if w, h, err = identifyThumbnail(pathThumb); err == nil { + return + } + } + + thumbDirname, _ := filepath.Split(pathThumb) + if err := os.MkdirAll(thumbDirname, 0755); err != nil { + return 0, 0, err + } + + // Create a normalized thumbnail. Since we don't particularly need + // any complex processing, such as surrounding of metadata, + // simply push it through ImageMagick. + // + // - http://www.ericbrasseur.org/gamma.html + // - https://www.imagemagick.org/Usage/thumbnails/ + // - https://imagemagick.org/script/command-line-options.php#layers + // + // "info:" output is written for each frame, which is why we delete + // all of them but the first one beforehands. + // + // TODO: See if we can optimize resulting WebP animations. + // (Do -layers optimize* apply to this format at all?) + cmd := exec.Command("magick", "-limit", "thread", "1", pathImage, + "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip", + "-resize", "256x128>", "-colorspace", "sRGB", + "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:") + + out, err := cmd.Output() + if err != nil { + return 0, 0, err + } + + _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) + return w, h, err +} + +// cmdThumbnail generates missing thumbnails, in parallel. +func cmdThumbnail(fs *flag.FlagSet, args []string) error { + load := fs.Bool("load", false, "try to load existing thumbnail files") + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() < 1 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + hexSHA1 := fs.Args()[1:] + if len(hexSHA1) == 0 { + // Get all unique images in the database with no thumbnail. + var err error + hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image + WHERE thumbw IS NULL OR thumbh IS NULL`) + if err != nil { + return err + } + } + + stmt, err := db.Prepare( + `UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`) + if err != nil { + return err + } + defer stmt.Close() + + var mu sync.Mutex + return parallelize(hexSHA1, func(sha1 string) (message string, err error) { + pathImage := imagePath(sha1) + pathThumb := thumbPath(sha1) + w, h, err := makeThumbnail(*load, pathImage, pathThumb) + if err != nil { + if ee, ok := err.(*exec.ExitError); ok { + return string(ee.Stderr), nil + } + return "", err + } + + mu.Lock() + defer mu.Unlock() + _, err = stmt.Exec(w, h, sha1) + return "", err + }) +} + +// --- Perceptual hash --------------------------------------------------------- + +type linearImage struct { + img image.Image +} + +func newLinearImage(img image.Image) *linearImage { + return &linearImage{img: img} +} + +func (l *linearImage) ColorModel() color.Model { return l.img.ColorModel() } +func (l *linearImage) Bounds() image.Rectangle { return l.img.Bounds() } + +func unSRGB(c uint32) uint8 { + n := float64(c) / 0xffff + if n <= 0.04045 { + return uint8(n * (255.0 / 12.92)) + } + return uint8(math.Pow((n+0.055)/(1.055), 2.4) * 255.0) +} + +func (l *linearImage) At(x, y int) color.Color { + r, g, b, a := l.img.At(x, y).RGBA() + return color.RGBA{ + R: unSRGB(r), G: unSRGB(g), B: unSRGB(b), A: uint8(a >> 8)} +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// isWebPAnimation returns whether the given ReadSeeker starts a WebP animation. +// See https://developers.google.com/speed/webp/docs/riff_container +func isWebPAnimation(rs io.ReadSeeker) (bool, error) { + b := make([]byte, 21) + if _, err := rs.Read(b); err != nil { + return false, err + } + if _, err := rs.Seek(0, io.SeekStart); err != nil { + return false, err + } + + return bytes.Equal(b[:4], []byte("RIFF")) && + bytes.Equal(b[8:16], []byte("WEBPVP8X")) && + b[20]&0b00000010 != 0, nil +} + +var errIsAnimation = errors.New("cannot perceptually hash animations") + +func dhashWebP(rs io.ReadSeeker) (uint64, error) { + if a, err := isWebPAnimation(rs); err != nil { + return 0, err + } else if a { + return 0, errIsAnimation + } + + // Doing this entire thing in Go is SLOW, but convenient. + source, err := webp.Decode(rs) + if err != nil { + return 0, err + } + + var ( + linear = newLinearImage(source) + resized = image.NewNRGBA64(image.Rect(0, 0, 9, 8)) + ) + draw.CatmullRom.Scale(resized, resized.Bounds(), + linear, linear.Bounds(), draw.Src, nil) + + var hash uint64 + for y := 0; y < 8; y++ { + var grey [9]float32 + for x := 0; x < 9; x++ { + rgba := resized.NRGBA64At(x, y) + grey[x] = 0.2126*float32(rgba.R) + + 0.7152*float32(rgba.G) + + 0.0722*float32(rgba.B) + } + + var row uint64 + if grey[0] < grey[1] { + row |= 1 << 7 + } + if grey[1] < grey[2] { + row |= 1 << 6 + } + if grey[2] < grey[3] { + row |= 1 << 5 + } + if grey[3] < grey[4] { + row |= 1 << 4 + } + if grey[4] < grey[5] { + row |= 1 << 3 + } + if grey[5] < grey[6] { + row |= 1 << 2 + } + if grey[6] < grey[7] { + row |= 1 << 1 + } + if grey[7] < grey[8] { + row |= 1 << 0 + } + hash = hash<<8 | row + } + return hash, nil +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func makeDhash(sha1 string) (uint64, error) { + pathThumb := thumbPath(sha1) + f, err := os.Open(pathThumb) + if err != nil { + return 0, err + } + defer f.Close() + return dhashWebP(f) +} + +// cmdDhash computes perceptual hashes from thumbnails. +func cmdDhash(fs *flag.FlagSet, args []string) error { + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() < 1 { + return errWrongUsage + } + if err := openDB(fs.Arg(0)); err != nil { + return err + } + + hexSHA1 := fs.Args()[1:] + if len(hexSHA1) == 0 { + var err error + hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image + WHERE thumbw IS NOT NULL AND thumbh IS NOT NULL AND dhash IS NULL`) + if err != nil { + return err + } + } + + stmt, err := db.Prepare(`UPDATE image SET dhash = ? WHERE sha1 = ?`) + if err != nil { + return err + } + defer stmt.Close() + + var mu sync.Mutex + return parallelize(hexSHA1, func(sha1 string) (message string, err error) { + hash, err := makeDhash(sha1) + if errors.Is(err, errIsAnimation) { + // Ignoring this common condition. + return "", nil + } else if err != nil { + return err.Error(), nil + } + + mu.Lock() + defer mu.Unlock() + _, err = stmt.Exec(int64(hash), sha1) + return "", err + }) +} + +// --- Main -------------------------------------------------------------------- + +var errWrongUsage = errors.New("wrong usage") + +var commands = map[string]struct { + handler func(*flag.FlagSet, []string) error + usage string + function string +}{ + "init": {cmdInit, "GD", "Initialize a database."}, + "web": {cmdWeb, "GD ADDRESS", "Launch a web interface."}, + "tag": {cmdTag, "GD SPACE [DESCRIPTION]", "Import tags."}, + "sync": {cmdSync, "GD ROOT...", "Synchronise with the filesystem."}, + "remove": {cmdRemove, "GD PATH...", "Remove database subtrees."}, + "check": {cmdCheck, "GD", "Run consistency checks."}, + "thumbnail": {cmdThumbnail, "GD [SHA1...]", "Generate thumbnails."}, + "dhash": {cmdDhash, "GD [SHA1...]", "Compute perceptual hashes."}, +} + +func usage() { + f := flag.CommandLine.Output() + fmt.Fprintf(f, "Usage: %s COMMAND [ARG...]\n", os.Args[0]) + flag.PrintDefaults() + + // The alphabetic ordering is unfortunate, but tolerable. + keys := []string{} + for key := range commands { + keys = append(keys, key) + } + sort.Strings(keys) + + fmt.Fprintf(f, "\nCommands:\n") + for _, key := range keys { + fmt.Fprintf(f, " %s [OPTION...] %s\n \t%s\n", + key, commands[key].usage, commands[key].function) + } +} + +func main() { + // This implements the -h switch for us by default. + // The rest of the handling here closely follows what flag does internally. + flag.Usage = usage + flag.Parse() + if flag.NArg() < 1 { + flag.Usage() + os.Exit(2) + } + + cmd, ok := commands[flag.Arg(0)] + if !ok { + fmt.Fprintf(flag.CommandLine.Output(), + "unknown command: %s\n", flag.Arg(0)) + flag.Usage() + os.Exit(2) + } + + fs := flag.NewFlagSet(flag.Arg(0), flag.ExitOnError) + fs.Usage = func() { + fmt.Fprintf(fs.Output(), + "Usage: %s [OPTION...] %s\n%s\n", + fs.Name(), cmd.usage, cmd.function) + fs.PrintDefaults() + } + + taskSemaphore = newSemaphore(runtime.NumCPU()) + err := cmd.handler(fs, flag.Args()[1:]) + + // Note that the database object has a closing finalizer, + // we just additionally print any errors coming from there. + if db != nil { + if err := db.Close(); err != nil { + log.Println(err) + } + } + + if errors.Is(err, errWrongUsage) { + fs.Usage() + os.Exit(2) + } else if err != nil { + log.Fatalln(err) + } +} |