package main
import (
"bufio"
"bytes"
"context"
"crypto/sha1"
"database/sql"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"html/template"
"image"
"image/color"
"io"
"io/fs"
"log"
"math"
"math/bits"
"net"
"net/http"
"os"
"os/exec"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"slices"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/mattn/go-sqlite3"
"golang.org/x/image/draw"
"golang.org/x/image/webp"
)
var (
db *sql.DB // sqlite database
galleryDirectory string // gallery directory
// taskSemaphore limits parallel computations.
taskSemaphore semaphore
)
const (
nameOfDB = "gallery.db"
nameOfImageRoot = "images"
nameOfThumbRoot = "thumbs"
)
func hammingDistance(a, b int64) int {
return bits.OnesCount64(uint64(a) ^ uint64(b))
}
func init() {
sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/)
},
})
}
func openDB(directory string) error {
var err error
db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory,
nameOfDB+"?_foreign_keys=1&_busy_timeout=1000"))
galleryDirectory = directory
return err
}
func imagePath(sha1 string) string {
return filepath.Join(galleryDirectory,
nameOfImageRoot, sha1[:2], sha1)
}
func thumbPath(sha1 string) string {
return filepath.Join(galleryDirectory,
nameOfThumbRoot, sha1[:2], sha1+".webp")
}
func dbCollectStrings(query string, a ...any) ([]string, error) {
rows, err := db.Query(query, a...)
if err != nil {
return nil, err
}
defer rows.Close()
result := []string{}
for rows.Next() {
var s string
if err := rows.Scan(&s); err != nil {
return nil, err
}
result = append(result, s)
}
if err := rows.Err(); err != nil {
return nil, err
}
return result, nil
}
// --- Semaphore ---------------------------------------------------------------
type semaphore chan struct{}
func newSemaphore(size int) semaphore { return make(chan struct{}, size) }
func (s semaphore) release() { <-s }
func (s semaphore) acquire(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case s <- struct{}{}:
}
// Give priority to context cancellation.
select {
case <-ctx.Done():
s.release()
return ctx.Err()
default:
}
return nil
}
// --- Progress bar ------------------------------------------------------------
type progressBar struct {
sync.Mutex
current int
target int
}
func newProgressBar(target int) *progressBar {
pb := &progressBar{current: 0, target: target}
pb.Update()
return pb
}
func (pb *progressBar) Stop() {
// The minimum thing that works: just print a newline.
os.Stdout.WriteString("\n")
}
func (pb *progressBar) Update() {
if pb.target < 0 {
fmt.Printf("\r%d/?", pb.current)
return
}
var fraction int
if pb.target != 0 {
fraction = int(float32(pb.current) / float32(pb.target) * 100)
}
target := fmt.Sprintf("%d", pb.target)
fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, fraction)
}
func (pb *progressBar) Step() {
pb.Lock()
defer pb.Unlock()
pb.current++
pb.Update()
}
func (pb *progressBar) Interrupt(callback func()) {
pb.Lock()
defer pb.Unlock()
pb.Stop()
defer pb.Update()
callback()
}
// --- Parallelization ---------------------------------------------------------
type parallelFunc func(item string) (message string, err error)
// parallelize runs the callback in parallel on a list of strings,
// reporting progress and any non-fatal messages.
func parallelize(strings []string, callback parallelFunc) error {
pb := newProgressBar(len(strings))
defer pb.Stop()
ctx, cancel := context.WithCancelCause(context.Background())
wg := sync.WaitGroup{}
for _, item := range strings {
if taskSemaphore.acquire(ctx) != nil {
break
}
wg.Add(1)
go func(item string) {
defer taskSemaphore.release()
defer wg.Done()
if message, err := callback(item); err != nil {
cancel(err)
} else if message != "" {
pb.Interrupt(func() { log.Printf("%s: %s\n", item, message) })
}
pb.Step()
}(item)
}
wg.Wait()
if ctx.Err() != nil {
return context.Cause(ctx)
}
return nil
}
// --- Initialization ----------------------------------------------------------
// cmdInit initializes a "gallery directory" that contains gallery.sqlite,
// images, thumbs.
func cmdInit(args []string) error {
if len(args) != 1 {
return errors.New("usage: GD")
}
if err := openDB(args[0]); err != nil {
return err
}
if _, err := db.Exec(initializeSQL); err != nil {
return err
}
// XXX: There's technically no reason to keep images as symlinks,
// we might just keep absolute paths in the database as well.
if err := os.MkdirAll(
filepath.Join(galleryDirectory, nameOfImageRoot), 0755); err != nil {
return err
}
if err := os.MkdirAll(
filepath.Join(galleryDirectory, nameOfThumbRoot), 0755); err != nil {
return err
}
return nil
}
// --- Web ---------------------------------------------------------------------
var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
var staticHandler http.Handler
var page = template.Must(template.New("/").Parse(`
Gallery
`))
func handleRequest(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
staticHandler.ServeHTTP(w, r)
return
}
if err := page.Execute(w, nil); err != nil {
log.Println(err)
}
}
func handleImages(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, imagePath(m[1]))
}
}
func handleThumbs(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, thumbPath(m[1]))
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) {
return dbCollectStrings(`SELECT name FROM node
WHERE IFNULL(parent, 0) = ? AND sha1 IS NULL`, parent)
}
type webEntry struct {
SHA1 string `json:"sha1"`
Name string `json:"name"`
Modified int64 `json:"modified"`
ThumbW int64 `json:"thumbW"`
ThumbH int64 `json:"thumbH"`
}
func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) {
rows, err := tx.Query(`
SELECT i.sha1, n.name, n.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0)
FROM node AS n
JOIN image AS i ON n.sha1 = i.sha1
WHERE n.parent = ?`, parent)
if err != nil {
return nil, err
}
defer rows.Close()
entries = []webEntry{}
for rows.Next() {
var e webEntry
if err := rows.Scan(
&e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil {
return nil, err
}
entries = append(entries, e)
}
return entries, rows.Err()
}
func handleAPIBrowse(w http.ResponseWriter, r *http.Request) {
var params struct {
Path string
}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var result struct {
Subdirectories []string `json:"subdirectories"`
Entries []webEntry `json:"entries"`
}
tx, err := db.Begin()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer tx.Rollback()
parent, err := idForPath(tx, decodeWebPath(params.Path), false)
if err != nil {
http.Error(w, err.Error(), http.StatusNotFound)
return
}
result.Subdirectories, err = getSubdirectories(tx, parent)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
result.Entries, err = getSubentries(tx, parent)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type webTagNamespace struct {
Description string `json:"description"`
Tags map[string]int64 `json:"tags"`
}
func getTags(nsID int64) (result map[string]int64, err error) {
rows, err := db.Query(`
SELECT t.name, COUNT(ta.tag) AS count
FROM tag AS t
LEFT JOIN tag_assignment AS ta ON t.id = ta.tag
WHERE t.space = ?
GROUP BY t.id`, nsID)
if err != nil {
return
}
defer rows.Close()
result = make(map[string]int64)
for rows.Next() {
var (
name string
count int64
)
if err = rows.Scan(&name, &count); err != nil {
return
}
result[name] = count
}
return result, rows.Err()
}
func getTagNamespaces(match *string) (
result map[string]webTagNamespace, err error) {
var rows *sql.Rows
if match != nil {
rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
FROM tag_space WHERE name = ?`, *match)
} else {
rows, err = db.Query(`SELECT id, name, IFNULL(description, '')
FROM tag_space`)
}
if err != nil {
return
}
defer rows.Close()
result = make(map[string]webTagNamespace)
for rows.Next() {
var (
id int64
name string
ns webTagNamespace
)
if err = rows.Scan(&id, &name, &ns.Description); err != nil {
return
}
if ns.Tags, err = getTags(id); err != nil {
return
}
result[name] = ns
}
return result, rows.Err()
}
func handleAPITags(w http.ResponseWriter, r *http.Request) {
var params struct {
Namespace *string
}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
result, err := getTagNamespaces(params.Namespace)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func getImageDimensions(sha1 string) (w int64, h int64, err error) {
err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`,
sha1).Scan(&w, &h)
return
}
func getImagePaths(sha1 string) (paths []string, err error) {
rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS (
SELECT parent, name AS path FROM node WHERE sha1 = ?
UNION ALL
SELECT n.parent, n.name || '/' || p.path
FROM node AS n JOIN paths AS p ON n.id = p.parent
) SELECT path FROM paths WHERE parent IS NULL`, sha1)
if err != nil {
return nil, err
}
defer rows.Close()
paths = []string{}
for rows.Next() {
var path string
if err := rows.Scan(&path); err != nil {
return nil, err
}
paths = append(paths, path)
}
return paths, rows.Err()
}
func getImageTags(sha1 string) (map[string]map[string]float32, error) {
rows, err := db.Query(`
SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta
JOIN tag AS t ON t.id = ta.tag
JOIN tag_space AS ts ON ts.id = t.space
WHERE ta.sha1 = ?`, sha1)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]map[string]float32)
for rows.Next() {
var (
space, tag string
weight float32
)
if err := rows.Scan(&space, &tag, &weight); err != nil {
return nil, err
}
tags := result[space]
if tags == nil {
tags = make(map[string]float32)
result[space] = tags
}
tags[tag] = weight
}
return result, rows.Err()
}
func handleAPIInfo(w http.ResponseWriter, r *http.Request) {
var params struct {
SHA1 string
}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var result struct {
Width int64 `json:"width"`
Height int64 `json:"height"`
Paths []string `json:"paths"`
Tags map[string]map[string]float32 `json:"tags"`
}
var err error
result.Width, result.Height, err = getImageDimensions(params.SHA1)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
result.Paths, err = getImagePaths(params.SHA1)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
result.Tags, err = getImageTags(params.SHA1)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type webSimilarImage struct {
SHA1 string `json:"sha1"`
PixelsRatio float32 `json:"pixelsRatio"`
ThumbW int64 `json:"thumbW"`
ThumbH int64 `json:"thumbH"`
Paths []string `json:"paths"`
}
func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (
result []webSimilarImage, err error) {
// For distance ∈ {0, 1}, this query is quite inefficient.
// In exchange, it's generic.
//
// If there's a dhash, there should also be thumbnail dimensions,
// so not bothering with IFNULL on them.
rows, err := db.Query(`
SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 <> ? AND dhash IS NOT NULL
AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
if err != nil {
return nil, err
}
defer rows.Close()
result = []webSimilarImage{}
for rows.Next() {
var (
match webSimilarImage
matchPixels int64
)
if err = rows.Scan(&match.SHA1,
&matchPixels, &match.ThumbW, &match.ThumbH); err != nil {
return nil, err
}
if match.Paths, err = getImagePaths(match.SHA1); err != nil {
return nil, err
}
match.PixelsRatio = float32(matchPixels) / float32(pixels)
result = append(result, match)
}
return result, rows.Err()
}
func getSimilarGroups(sha1 string, dhash int64, pixels int64,
output map[string][]webSimilarImage) error {
var err error
for distance := 0; distance <= 1; distance++ {
output[fmt.Sprintf("Perceptual distance %d", distance)], err =
getSimilar(sha1, dhash, pixels, distance)
if err != nil {
return err
}
}
return nil
}
func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
var params struct {
SHA1 string
}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var result struct {
Info webSimilarImage `json:"info"`
Groups map[string][]webSimilarImage `json:"groups"`
}
result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1}
if paths, err := getImagePaths(params.SHA1); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
} else {
result.Info.Paths = paths
}
var (
width, height int64
dhash sql.NullInt64
)
err := db.QueryRow(`
SELECT width, height, dhash, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &dhash,
&result.Info.ThumbW, &result.Info.ThumbH)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
result.Groups = make(map[string][]webSimilarImage)
if dhash.Valid {
if err := getSimilarGroups(
params.SHA1, dhash.Int64, width*height, result.Groups); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type webDuplicateImage struct {
SHA1 string `json:"sha1"`
ThumbW int64 `json:"thumbW"`
ThumbH int64 `json:"thumbH"`
Occurences int64 `json:"occurences"`
}
// A hamming distance of zero (direct dhash match) will be more than sufficient.
const duplicatesCTE = `WITH
duplicated(dhash, count) AS (
SELECT dhash, COUNT(*) AS count FROM image
WHERE dhash IS NOT NULL
GROUP BY dhash HAVING count > 1
),
multipathed(sha1, count) AS (
SELECT n.sha1, COUNT(*) AS count FROM node AS n
JOIN image AS i ON i.sha1 = n.sha1
WHERE i.dhash IS NULL
OR i.dhash NOT IN (SELECT dhash FROM duplicated)
GROUP BY n.sha1 HAVING count > 1
)
`
func getDuplicatesSimilar(stmt *sql.Stmt, dhash int64) (
result []webDuplicateImage, err error) {
rows, err := stmt.Query(dhash)
if err != nil {
return nil, err
}
defer rows.Close()
result = []webDuplicateImage{}
for rows.Next() {
var image webDuplicateImage
if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
&image.Occurences); err != nil {
return nil, err
}
result = append(result, image)
}
return result, rows.Err()
}
func getDuplicates1(result [][]webDuplicateImage) (
[][]webDuplicateImage, error) {
stmt, err := db.Prepare(`
SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
COUNT(*) AS occurences
FROM image AS i
JOIN node AS n ON n.sha1 = i.sha1
WHERE i.dhash = ?
GROUP BY n.sha1`)
if err != nil {
return nil, err
}
defer stmt.Close()
rows, err := db.Query(duplicatesCTE + `SELECT dhash FROM duplicated`)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var (
group []webDuplicateImage
dhash int64
)
if err = rows.Scan(&dhash); err != nil {
return nil, err
}
if group, err = getDuplicatesSimilar(stmt, dhash); err != nil {
return nil, err
}
result = append(result, group)
}
return result, rows.Err()
}
func getDuplicates2(result [][]webDuplicateImage) (
[][]webDuplicateImage, error) {
stmt, err := db.Prepare(`
SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
COUNT(*) AS occurences
FROM image AS i
JOIN node AS n ON n.sha1 = i.sha1
WHERE i.sha1 = ?
GROUP BY n.sha1`)
if err != nil {
return nil, err
}
defer stmt.Close()
rows, err := db.Query(duplicatesCTE + `SELECT sha1 FROM multipathed`)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var (
image webDuplicateImage
sha1 string
)
if err = rows.Scan(&sha1); err != nil {
return nil, err
}
if err := stmt.QueryRow(sha1).Scan(&image.SHA1,
&image.ThumbW, &image.ThumbH, &image.Occurences); err != nil {
return nil, err
}
result = append(result, []webDuplicateImage{image})
}
return result, rows.Err()
}
func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
var params struct{}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var (
result = [][]webDuplicateImage{}
err error
)
if result, err = getDuplicates1(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if result, err = getDuplicates2(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type webOrphanImage struct {
SHA1 string `json:"sha1"`
ThumbW int64 `json:"thumbW"`
ThumbH int64 `json:"thumbH"`
Tags int64 `json:"tags"`
}
type webOrphan struct {
webOrphanImage
LastPath string `json:"lastPath"`
Replacement *webOrphanImage `json:"replacement"`
}
func getOrphanReplacement(webPath string) (*webOrphanImage, error) {
tx, err := db.Begin()
if err != nil {
return nil, err
}
defer tx.Rollback()
path := decodeWebPath(webPath)
if len(path) == 0 {
return nil, nil
}
parent, err := idForPath(tx, path[:len(path)-1], false)
if err != nil {
return nil, err
}
var image webOrphanImage
err = db.QueryRow(`SELECT i.sha1,
IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
FROM node AS n
JOIN image AS i ON n.sha1 = i.sha1
LEFT JOIN tag_assignment AS ta ON n.sha1 = ta.sha1
WHERE n.parent = ? AND n.name = ?
GROUP BY n.sha1`, parent, path[len(path)-1]).Scan(
&image.SHA1, &image.ThumbW, &image.ThumbH, &image.Tags)
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, err
}
return &image, nil
}
func getOrphans() (result []webOrphan, err error) {
rows, err := db.Query(`SELECT o.sha1, o.path,
IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0), COUNT(ta.sha1) AS tags
FROM orphan AS o
JOIN image AS i ON o.sha1 = i.sha1
LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1
GROUP BY o.sha1`)
if err != nil {
return nil, err
}
defer rows.Close()
result = []webOrphan{}
for rows.Next() {
var orphan webOrphan
if err = rows.Scan(&orphan.SHA1, &orphan.LastPath,
&orphan.ThumbW, &orphan.ThumbH, &orphan.Tags); err != nil {
return nil, err
}
orphan.Replacement, err = getOrphanReplacement(orphan.LastPath)
if err != nil {
return nil, err
}
result = append(result, orphan)
}
return result, rows.Err()
}
func handleAPIOrphans(w http.ResponseWriter, r *http.Request) {
var params struct{}
if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
result, err := getOrphans()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
log.Println(err)
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// cmdWeb runs a web UI against GD on ADDRESS.
func cmdWeb(args []string) error {
if len(args) != 2 {
return errors.New("usage: GD ADDRESS")
}
if err := openDB(args[0]); err != nil {
return err
}
address := args[1]
// This separation is not strictly necessary,
// but having an elementary level of security doesn't hurt either.
staticHandler = http.FileServer(http.Dir("public"))
http.HandleFunc("/", handleRequest)
http.HandleFunc("/image/", handleImages)
http.HandleFunc("/thumb/", handleThumbs)
http.HandleFunc("/api/browse", handleAPIBrowse)
http.HandleFunc("/api/tags", handleAPITags)
http.HandleFunc("/api/duplicates", handleAPIDuplicates)
http.HandleFunc("/api/orphans", handleAPIOrphans)
http.HandleFunc("/api/info", handleAPIInfo)
http.HandleFunc("/api/similar", handleAPISimilar)
host, port, err := net.SplitHostPort(address)
if err != nil {
log.Println(err)
} else if host == "" {
log.Println("http://" + net.JoinHostPort("localhost", port))
} else {
log.Println("http://" + address)
}
s := &http.Server{
Addr: address,
ReadTimeout: 60 * time.Second,
WriteTimeout: 60 * time.Second,
MaxHeaderBytes: 32 << 10,
}
return s.ListenAndServe()
}
// --- Import ------------------------------------------------------------------
func idForPath(tx *sql.Tx, path []string, create bool) (int64, error) {
var parent sql.NullInt64
for _, name := range path {
if err := tx.QueryRow(`SELECT id FROM node
WHERE parent IS ? AND name = ? AND sha1 IS NULL`,
parent, name).Scan(&parent); err == nil {
continue
} else if !errors.Is(err, sql.ErrNoRows) {
return 0, err
} else if !create {
return 0, err
}
// This fails when trying to override a leaf node.
// That needs special handling.
if result, err := tx.Exec(
`INSERT INTO node(parent, name) VALUES (?, ?)`,
parent, name); err != nil {
return 0, err
} else if id, err := result.LastInsertId(); err != nil {
return 0, err
} else {
parent = sql.NullInt64{Int64: id, Valid: true}
}
}
return parent.Int64, nil
}
func decodeWebPath(path string) []string {
// Relative paths could be handled differently,
// but right now, they're assumed to start at the root.
result := []string{}
for _, crumb := range strings.Split(path, "/") {
if crumb != "" {
result = append(result, crumb)
}
}
return result
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type directoryManager struct {
cache map[string]int64 // Unix-style paths to directory.id
}
func (dm *directoryManager) IDForDirectoryPath(
tx *sql.Tx, path string) (int64, error) {
path = filepath.ToSlash(filepath.Clean(path))
list := decodeWebPath(path)
if len(list) == 0 {
return 0, nil
}
if dm.cache == nil {
dm.cache = make(map[string]int64)
} else if id, ok := dm.cache[path]; ok {
return id, nil
}
id, err := idForPath(tx, list, true)
if err != nil {
return 0, err
}
dm.cache[path] = id
return id, nil
}
func isImage(path string) (bool, error) {
out, err := exec.Command("xdg-mime", "query", "filetype", path).Output()
if err != nil {
return false, err
}
return bytes.HasPrefix(out, []byte("image/")), nil
}
func pingImage(path string) (int, int, error) {
out, err := exec.Command("identify", "-limit", "thread", "1", "-ping",
"-format", "%w %h", path+"[0]").Output()
if err != nil {
return 0, 0, err
}
var w, h int
_, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
return w, h, err
}
type importer struct {
dm directoryManager
dmMutex sync.Mutex
}
func (i *importer) Import(path string) error {
// The input may be a relative path, and we want to remember it as such,
// but symlinks for the images must be absolute.
absPath, err := filepath.Abs(path)
if err != nil {
return err
}
// Skip videos, which ImageMagick can process, but we don't want it to,
// so that they're not converted 1:1 to WebP.
pathIsImage, err := isImage(path)
if err != nil {
return err
}
if !pathIsImage {
return nil
}
width, height, err := pingImage(path)
if err != nil {
return err
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
s, err := f.Stat()
if err != nil {
return err
}
hash := sha1.New()
_, err = io.CopyBuffer(hash, f, make([]byte, 65536))
if err != nil {
return err
}
hexSHA1 := hex.EncodeToString(hash.Sum(nil))
pathImage := imagePath(hexSHA1)
imageDirname, _ := filepath.Split(pathImage)
if err := os.MkdirAll(imageDirname, 0755); err != nil {
return err
}
if err := os.Symlink(absPath, pathImage); err != nil &&
!errors.Is(err, fs.ErrExist) {
return err
}
// The directoryManager isn't thread-safe.
// This lock also simulates a timeout-less BEGIN EXCLUSIVE.
i.dmMutex.Lock()
defer i.dmMutex.Unlock()
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
if _, err = tx.Exec(`INSERT INTO image(sha1, width, height) VALUES (?, ?, ?)
ON CONFLICT(sha1) DO NOTHING`, hexSHA1, width, height); err != nil {
return err
}
// XXX: The directoryManager's cache is questionable here,
// if only because it keeps entries even when transactions fail.
dbDirname, dbBasename := filepath.Split(path)
dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname)
if err != nil {
return err
}
// FIXME: This disallows any entries directly in the root.
_, err = tx.Exec(`INSERT INTO node(parent, name, mtime, sha1)
VALUES (?, ?, ?, ?) ON CONFLICT DO
UPDATE SET mtime = excluded.mtime, sha1 = excluded.sha1`,
dbParent, dbBasename, s.ModTime().Unix(), hexSHA1)
if err != nil {
return err
}
return tx.Commit()
}
// cmdImport adds files to the "node" table.
// TODO: Consider making this copy rather than symlink images.
func cmdImport(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD ROOT...")
}
if err := openDB(args[0]); err != nil {
return err
}
// Make the first step collecting all the paths,
// in order to show more useful progress information.
paths := []string{}
cb := func(path string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return err
}
paths = append(paths, path)
return nil
}
for _, name := range args[1:] {
if err := filepath.WalkDir(name, cb); err != nil {
return err
}
}
i := importer{}
return parallelize(paths, func(path string) (string, error) {
return "", i.Import(path)
})
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type syncFileInfo struct {
dbID int64 // DB node ID, or zero if there was none
dbParent int64 // where the file was to be stored
dbName string // the name under which it was to be stored
fsPath string // symlink target
fsMtime int64 // last modified Unix timestamp, used a bit like an ID
err error // any processing error
sha1 string // raw content hash, empty to skip file
width int // image width in pixels
height int // image height in pixels
}
type syncContext struct {
ctx context.Context
tx *sql.Tx
info chan syncFileInfo
pb *progressBar
stmtOrphan *sql.Stmt
stmtDisposeSub *sql.Stmt
stmtDisposeAll *sql.Stmt
}
func syncPrintf(c *syncContext, format string, v ...any) {
c.pb.Stop()
log.Printf(format+"\n", v...)
c.pb.Update()
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type syncNode struct {
dbID int64
dbName string
dbMtime int64
dbSHA1 string
}
func (n *syncNode) dbIsDir() bool { return n.dbSHA1 == "" }
type syncFile struct {
fsName string
fsMtime int64
fsIsDir bool
}
type syncPair struct {
db *syncNode
fs *syncFile
}
// syncGetNodes returns direct children of a DB node, ordered by name.
// SQLite, like Go, compares strings byte-wise by default.
func syncGetNodes(tx *sql.Tx, dbParent int64) (nodes []syncNode, err error) {
// This works even for the root, which doesn't exist as a DB node.
rows, err := tx.Query(`SELECT id, name, IFNULL(mtime, 0), IFNULL(sha1, '')
FROM node WHERE IFNULL(parent, 0) = ? ORDER BY name`, dbParent)
if err != nil {
return
}
defer rows.Close()
for rows.Next() {
var node syncNode
if err = rows.Scan(&node.dbID,
&node.dbName, &node.dbMtime, &node.dbSHA1); err != nil {
return
}
nodes = append(nodes, node)
}
return nodes, rows.Err()
}
// syncGetFiles returns direct children of a FS directory, ordered by name.
func syncGetFiles(fsPath string) (files []syncFile, err error) {
dir, err := os.Open(fsPath)
if err != nil {
return
}
defer dir.Close()
entries, err := dir.ReadDir(0)
if err != nil {
return
}
for _, entry := range entries {
info, err := entry.Info()
if err != nil {
return files, err
}
files = append(files, syncFile{
fsName: entry.Name(),
fsMtime: info.ModTime().Unix(),
fsIsDir: entry.IsDir(),
})
}
sort.Slice(files,
func(a, b int) bool { return files[a].fsName < files[b].fsName })
return
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func syncProcess(c *syncContext, info *syncFileInfo) error {
// Skip videos, which ImageMagick can process, but we don't want it to,
// so that they're not converted 1:1 to WebP.
pathIsImage, err := isImage(info.fsPath)
if err != nil {
return err
}
if !pathIsImage {
return nil
}
info.width, info.height, err = pingImage(info.fsPath)
if err != nil {
return err
}
f, err := os.Open(info.fsPath)
if err != nil {
return err
}
defer f.Close()
// We could make this at least somewhat interruptible by c.ctx,
// though it would still work poorly.
hash := sha1.New()
_, err = io.CopyBuffer(hash, f, make([]byte, 65536))
if err != nil {
return err
}
info.sha1 = hex.EncodeToString(hash.Sum(nil))
return nil
}
// syncEnqueue runs file scanning, which can be CPU and I/O expensive,
// in parallel. The goroutine only touches the filesystem, read-only.
func syncEnqueue(c *syncContext, info syncFileInfo) error {
if err := taskSemaphore.acquire(c.ctx); err != nil {
return err
}
go func(info syncFileInfo) {
defer taskSemaphore.release()
info.err = syncProcess(c, &info)
c.info <- info
}(info)
return nil
}
// syncDequeue flushes the result queue of finished asynchronous tasks.
func syncDequeue(c *syncContext) error {
for {
select {
case <-c.ctx.Done():
return c.ctx.Err()
case info := <-c.info:
if err := syncPostProcess(c, info); err != nil {
return err
}
default:
return nil
}
}
}
// syncDispose creates orphan records for the entire subtree given by nodeID
// as appropriate, then deletes all nodes within the subtree. The subtree root
// node is not deleted if "keepNode" is true.
//
// Orphans keep their thumbnail files, as evidence.
func syncDispose(c *syncContext, nodeID int64, keepNode bool) error {
if _, err := c.stmtOrphan.Exec(nodeID); err != nil {
return err
}
if keepNode {
if _, err := c.stmtDisposeSub.Exec(nodeID); err != nil {
return err
}
} else {
if _, err := c.stmtDisposeAll.Exec(nodeID); err != nil {
return err
}
}
return nil
}
func syncImage(c *syncContext, info syncFileInfo) error {
if _, err := c.tx.Exec(`INSERT INTO image(sha1, width, height)
VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`,
info.sha1, info.width, info.height); err != nil {
return err
}
// Fast path: it may already there, and not be a dead symlink.
path := imagePath(info.sha1)
if _, err := os.Stat(path); err == nil {
return nil
}
dirname, _ := filepath.Split(path)
if err := os.MkdirAll(dirname, 0755); err != nil {
return err
}
for {
err := os.Symlink(info.fsPath, path)
if !errors.Is(err, fs.ErrExist) {
return err
}
// Try to remove anything standing in the way, and try again.
if err = os.Remove(path); err != nil {
return err
}
}
}
func syncPostProcess(c *syncContext, info syncFileInfo) error {
defer c.pb.Step()
// TODO: When replacing an image node (whether it has or doesn't have
// other links to keep it alive), we could offer copying all tags,
// though this needs another table to track it.
// (If it's equivalent enough, the dhash will stay the same,
// so user can resolve this through the duplicates feature.)
switch {
case info.err != nil:
// * → error
if ee, ok := info.err.(*exec.ExitError); ok {
syncPrintf(c, "%s: %s", info.fsPath, ee.Stderr)
} else {
return info.err
}
fallthrough
case info.sha1 == "":
// 0 → 0
if info.dbID == 0 {
return nil
}
// D → 0, F → 0
return syncDispose(c, info.dbID, false /*keepNode*/)
case info.dbID == 0:
// 0 → F
if err := syncImage(c, info); err != nil {
return err
}
if _, err := c.tx.Exec(`INSERT INTO node(parent, name, mtime, sha1)
VALUES (?, ?, ?, ?)`,
info.dbParent, info.dbName, info.fsMtime, info.sha1); err != nil {
return err
}
return nil
default:
// D → F, F → F (this statement is a no-op with the latter)
if err := syncDispose(c, info.dbID, true /*keepNode*/); err != nil {
return err
}
// Even if the hash didn't change, we may fix any broken symlinks.
if err := syncImage(c, info); err != nil {
return err
}
if _, err := c.tx.Exec(`UPDATE node SET mtime = ?, sha1 = ?
WHERE id = ?`, info.fsMtime, info.sha1, info.dbID); err != nil {
return err
}
return nil
}
}
func syncDirectoryPair(c *syncContext, dbParent int64, fsPath string,
pair syncPair) error {
db, fs, fsInfo := pair.db, pair.fs, syncFileInfo{dbParent: dbParent}
if db != nil {
fsInfo.dbID = db.dbID
}
if fs != nil {
fsInfo.dbName = fs.fsName
fsInfo.fsPath = filepath.Join(fsPath, fs.fsName)
fsInfo.fsMtime = fs.fsMtime
}
switch {
case db == nil && fs == nil:
// 0 → 0, unreachable.
case db == nil && fs.fsIsDir:
// 0 → D
var id int64
if result, err := c.tx.Exec(`INSERT INTO node(parent, name)
VALUES (?, ?)`, dbParent, fs.fsName); err != nil {
return err
} else if id, err = result.LastInsertId(); err != nil {
return err
}
return syncDirectory(c, id, filepath.Join(fsPath, fs.fsName))
case db == nil:
// 0 → F (or 0 → 0)
return syncEnqueue(c, fsInfo)
case fs == nil:
// D → 0, F → 0
return syncDispose(c, db.dbID, false /*keepNode*/)
case db.dbIsDir() && fs.fsIsDir:
// D → D
return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName))
case db.dbIsDir():
// D → F (or D → 0)
return syncEnqueue(c, fsInfo)
case fs.fsIsDir:
// F → D
if err := syncDispose(c, db.dbID, true /*keepNode*/); err != nil {
return err
}
if _, err := c.tx.Exec(`UPDATE node
SET mtime = NULL, sha1 = NULL WHERE id = ?`, db.dbID); err != nil {
return err
}
return syncDirectory(c, db.dbID, filepath.Join(fsPath, fs.fsName))
case db.dbMtime != fs.fsMtime:
// F → F (or F → 0)
// Assuming that any content modifications change the timestamp.
return syncEnqueue(c, fsInfo)
}
return nil
}
func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
db, err := syncGetNodes(c.tx, dbParent)
if err != nil {
return err
}
fs, err := syncGetFiles(fsPath)
if err != nil {
return err
}
// This would not be fatal, but it has annoying consequences.
if _, ok := slices.BinarySearchFunc(fs, syncFile{fsName: nameOfDB},
func(a, b syncFile) int {
return strings.Compare(a.fsName, b.fsName)
}); ok {
syncPrintf(c, "%s may be a gallery directory, treating as empty",
fsPath)
fs = nil
}
// Convert differences to a more convenient form for processing.
iDB, iFS, pairs := 0, 0, []syncPair{}
for iDB < len(db) && iFS < len(fs) {
if db[iDB].dbName == fs[iFS].fsName {
pairs = append(pairs, syncPair{&db[iDB], &fs[iFS]})
iDB++
iFS++
} else if db[iDB].dbName < fs[iFS].fsName {
pairs = append(pairs, syncPair{&db[iDB], nil})
iDB++
} else {
pairs = append(pairs, syncPair{nil, &fs[iFS]})
iFS++
}
}
for i := range db[iDB:] {
pairs = append(pairs, syncPair{&db[iDB+i], nil})
}
for i := range fs[iFS:] {
pairs = append(pairs, syncPair{nil, &fs[iFS+i]})
}
for _, pair := range pairs {
if err := syncDequeue(c); err != nil {
return err
}
if err := syncDirectoryPair(c, dbParent, fsPath, pair); err != nil {
return err
}
}
return nil
}
func syncRoot(c *syncContext, fsPath string) error {
// Figure out a database root (not trying to convert F → D on conflict,
// also because we don't know yet if the argument is a directory).
//
// Synchronizing F → D or * → F are special cases not worth implementing.
crumbs := decodeWebPath(filepath.ToSlash(fsPath))
dbParent, err := idForPath(c.tx, crumbs, true)
if err != nil {
return err
}
if err := syncDirectory(c, dbParent, fsPath); err != nil {
return err
}
// Wait for all tasks to finish, and process the results of their work.
for i := 0; i < cap(taskSemaphore); i++ {
if err := taskSemaphore.acquire(c.ctx); err != nil {
return err
}
}
if err := syncDequeue(c); err != nil {
return err
}
// This is not our semaphore, so prepare it for the next user.
for i := 0; i < cap(taskSemaphore); i++ {
taskSemaphore.release()
}
// Delete empty directories, from the bottom of the tree up to,
// but not including, the inserted root.
//
// We need to do this at the end due to our recursive handling,
// as well as because of asynchronous file filtering.
stmt, err := c.tx.Prepare(`
WITH RECURSIVE subtree(id, parent, sha1, level) AS (
SELECT id, parent, sha1, 1 FROM node WHERE id = ?
UNION ALL
SELECT n.id, n.parent, n.sha1, s.level + 1
FROM node AS n JOIN subtree AS s ON n.parent = s.id
) DELETE FROM node WHERE id IN (
SELECT id FROM subtree WHERE level <> 1 AND sha1 IS NULL
-- No idea why one can't put the "node" table in the subselect.
-- The whole query then matches nothing.
AND id NOT IN (SELECT parent FROM subtree)
)`)
if err != nil {
return err
}
for {
if result, err := stmt.Exec(dbParent); err != nil {
return err
} else if n, err := result.RowsAffected(); err != nil {
return err
} else if n == 0 {
return nil
}
}
}
const disposeCTE = `WITH RECURSIVE
root(id, sha1, parent, path) AS (
SELECT id, sha1, parent, name FROM node WHERE id = ?
UNION ALL
SELECT r.id, r.sha1, n.parent, n.name || '/' || r.path
FROM node AS n JOIN root AS r ON n.id = r.parent
),
children(id, sha1, path, level) AS (
SELECT id, sha1, path, 1 FROM root WHERE parent IS NULL
UNION ALL
SELECT n.id, n.sha1, c.path || '/' || n.name, c.level + 1
FROM node AS n JOIN children AS c ON n.parent = c.id
),
removed(sha1, count, path) AS (
SELECT sha1, COUNT(*) AS count, MIN(path) AS path
FROM children
GROUP BY sha1
),
orphaned(sha1, path, count, total) AS (
SELECT r.sha1, r.path, r.count, COUNT(*) AS total
FROM removed AS r
JOIN node ON node.sha1 = r.sha1
GROUP BY node.sha1
HAVING count = total
)`
func syncRun(ctx context.Context, tx *sql.Tx, roots []string) error {
c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1)}
defer c.pb.Stop()
var err error
if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
INSERT OR IGNORE INTO orphan(sha1, path)
SELECT sha1, path FROM orphaned`); err != nil {
return err
}
if c.stmtDisposeSub, err = c.tx.Prepare(disposeCTE + `
DELETE FROM node WHERE id
IN (SELECT DISTINCT id FROM children WHERE level <> 1)`); err != nil {
return err
}
if c.stmtDisposeAll, err = c.tx.Prepare(disposeCTE + `
DELETE FROM node WHERE id
IN (SELECT DISTINCT id FROM children)`); err != nil {
return err
}
// Info tasks take a position in the task semaphore channel.
// then fill the info channel.
//
// Immediately after syncDequeue(), the info channel is empty,
// but the semaphore might be full.
//
// By having at least one position in the info channel,
// we allow at least one info task to run to semaphore release,
// so that syncEnqueue() doesn't deadlock.
//
// By making it the same size as the semaphore,
// the end of this function doesn't need to dequeue while waiting.
// It also prevents goroutine leaks despite leaving them running--
// once they finish their job, they're gone,
// and eventually the info channel would get garbage collected.
//
// The additional slot is there to handle the one result
// that may be placed while syncEnqueue() waits for the semaphore,
// i.e., it is for the result of the task that syncEnqueue() spawns.
c.info = make(chan syncFileInfo, cap(taskSemaphore)+1)
for _, path := range roots {
if err := syncRoot(&c, path); err != nil {
return err
}
}
return nil
}
// cmdSync ensures the given (sub)roots are accurately reflected
// in the database.
func cmdSync(args []string) error {
if len(args) < 2 {
return errors.New("usage: GD ROOT...")
}
if err := openDB(args[0]); err != nil {
return err
}
// TODO: See if the SQLite can cancel anything in a useful manner.
// If using this, beware that a cancel prevents commiting transactions.
ctx := context.Background()
// In case of a failure during processing, the only retained side effects
// on the filesystem tree are:
// - Fixing dead symlinks to images.
// - Creating symlinks to images that aren't necessary.
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer tx.Rollback()
// Mild hack: upgrade the transaction to a write one straight away,
// in order to rule out deadlocks (preventable failure).
if _, err := tx.Exec(`END TRANSACTION;
BEGIN IMMEDIATE TRANSACTION`); err != nil {
return err
}
// XXX: By not using the context for the transaction,
// interrupts can get ignored around the Commit.
ctxSignal, stop := signal.NotifyContext(ctx, os.Interrupt)
defer stop()
// Normalize arguments.
// At least for now, turn all roots into absolute paths.
roots := args[1:]
for i := range roots {
roots[i], err = filepath.Abs(filepath.Clean(roots[i]))
if err != nil {
return err
}
}
// Filter out duplicates.
sort.Strings(roots)
roots = slices.CompactFunc(roots, func(a, b string) bool {
if a != b && !strings.HasPrefix(b, a+"/") {
return false
}
log.Printf("asking to sync path twice: %s\n", b)
return true
})
if err := syncRun(ctxSignal, tx, roots); err != nil {
return err
}
return tx.Commit()
}
// --- Tagging -----------------------------------------------------------------
// cmdTag mass imports tags from data passed on stdin as a TSV
// of SHA1 TAG WEIGHT entries.
func cmdTag(args []string) error {
if len(args) < 2 || len(args) > 3 {
return errors.New("usage: GD SPACE [DESCRIPTION]")
}
if err := openDB(args[0]); err != nil {
return err
}
space := args[1]
var description sql.NullString
if len(args) >= 3 {
description = sql.NullString{String: args[2], Valid: true}
}
// Note that starting as a write transaction prevents deadlocks.
// Imports are rare, and just bulk load data, so this scope is fine.
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description)
VALUES (?, ?)`, space, description); err != nil {
return err
}
var spaceID int64
if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`,
space).Scan(&spaceID); err != nil {
return err
}
// XXX: It might make sense to pre-erase all tag assignments within
// the given space for that image, the first time we see it:
//
// DELETE FROM tag_assignment
// WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?)
//
// or even just clear the tag space completely:
//
// DELETE FROM tag_assignment
// WHERE tag IN (SELECT id FROM tag WHERE space = ?);
// DELETE FROM tag WHERE space = ?;
stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight)
VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?)
ON CONFLICT DO UPDATE SET weight = ?`)
if err != nil {
return err
}
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
fields := strings.Split(scanner.Text(), "\t")
if len(fields) != 3 {
return errors.New("invalid input format")
}
sha1, tag := fields[0], fields[1]
weight, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
return err
}
if _, err := tx.Exec(
`INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`,
spaceID, tag); err != nil {
return nil
}
if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil {
return fmt.Errorf("%s: %s", sha1, err)
}
}
if err := scanner.Err(); err != nil {
return err
}
return tx.Commit()
}
// --- Check -------------------------------------------------------------------
func isValidSHA1(hash string) bool {
if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash {
return false
}
if _, err := hex.DecodeString(hash); err != nil {
return false
}
return true
}
func hashesToFileListing(root, suffix string, hashes []string) []string {
// Note that we're semi-duplicating {image,thumb}Path().
paths := []string{root}
for _, hash := range hashes {
dir := filepath.Join(root, hash[:2])
paths = append(paths, dir, filepath.Join(dir, hash+suffix))
}
slices.Sort(paths)
return slices.Compact(paths)
}
func collectFileListing(root string) (paths []string, err error) {
err = filepath.WalkDir(root,
func(path string, d fs.DirEntry, err error) error {
paths = append(paths, path)
return err
})
// Even though it should already be sorted somehow.
slices.Sort(paths)
return
}
func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
db := hashesToFileListing(root, suffix, hashes)
fs, err := collectFileListing(root)
if err != nil {
return false, nil, err
}
iDB, iFS, ok, intersection := 0, 0, true, []string{}
for iDB < len(db) && iFS < len(fs) {
if db[iDB] == fs[iFS] {
intersection = append(intersection, db[iDB])
iDB++
iFS++
} else if db[iDB] < fs[iFS] {
ok = false
fmt.Printf("only in DB: %s\n", db[iDB])
iDB++
} else {
ok = false
fmt.Printf("only in FS: %s\n", fs[iFS])
iFS++
}
}
for _, path := range db[iDB:] {
ok = false
fmt.Printf("only in DB: %s\n", path)
}
for _, path := range fs[iFS:] {
ok = false
fmt.Printf("only in FS: %s\n", path)
}
return ok, intersection, nil
}
// cmdCheck carries out various database consistency checks.
func cmdCheck(args []string) error {
if len(args) != 1 {
return errors.New("usage: GD")
}
if err := openDB(args[0]); err != nil {
return err
}
// Check if hashes are in the right format.
log.Println("checking image hashes")
allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`)
if err != nil {
return err
}
ok := true
for _, hash := range allSHA1 {
if !isValidSHA1(hash) {
ok = false
fmt.Printf("invalid image SHA1: %s\n", hash)
}
}
// This is, rather obviously, just a strict subset.
// Although it doesn't run in the same transaction.
thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image
WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`)
if err != nil {
return err
}
// This somewhat duplicates {image,thumb}Path().
log.Println("checking SQL against filesystem")
okImages, intersection, err := checkFiles(
filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
if err != nil {
return err
}
okThumbs, _, err := checkFiles(
filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
if err != nil {
return err
}
if !okImages || !okThumbs {
ok = false
}
// NOTE: We could also compare mtime, and on mismatch the current SHA1,
// though that's more of a "sync" job.
log.Println("checking for dead symlinks")
for _, path := range intersection {
if _, err := os.Stat(path); err != nil {
ok = false
fmt.Printf("%s: %s\n", path, err)
}
}
if !ok {
return errors.New("detected inconsistencies")
}
return nil
}
// --- Thumbnailing ------------------------------------------------------------
func makeThumbnail(pathImage, pathThumb string) (int, int, error) {
thumbDirname, _ := filepath.Split(pathThumb)
if err := os.MkdirAll(thumbDirname, 0755); err != nil {
return 0, 0, err
}
// Create a normalized thumbnail. Since we don't particularly need
// any complex processing, such as surrounding of metadata,
// simply push it through ImageMagick.
//
// - http://www.ericbrasseur.org/gamma.html
// - https://www.imagemagick.org/Usage/thumbnails/
// - https://imagemagick.org/script/command-line-options.php#layers
//
// "info:" output is written for each frame, which is why we delete
// all of them but the first one beforehands.
//
// TODO: See if we can optimize resulting WebP animations.
// (Do -layers optimize* apply to this format at all?)
cmd := exec.Command("convert", "-limit", "thread", "1", pathImage,
"-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip",
"-resize", "256x128>", "-colorspace", "sRGB",
"-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:")
out, err := cmd.Output()
if err != nil {
return 0, 0, err
}
var w, h int
_, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
return w, h, err
}
func makeThumbnailFor(sha1 string) (message string, err error) {
pathImage := imagePath(sha1)
pathThumb := thumbPath(sha1)
w, h, err := makeThumbnail(pathImage, pathThumb)
if err != nil {
if ee, ok := err.(*exec.ExitError); ok {
return string(ee.Stderr), nil
}
return "", err
}
_, err = db.Exec(`UPDATE image SET thumbw = ?, thumbh = ?
WHERE sha1 = ?`, w, h, sha1)
return "", err
}
// cmdThumbnail generates missing thumbnails, in parallel.
func cmdThumbnail(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD [SHA1...]")
}
if err := openDB(args[0]); err != nil {
return err
}
hexSHA1 := args[1:]
if len(hexSHA1) == 0 {
// Get all unique images in the database with no thumbnail.
var err error
hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
WHERE thumbw IS NULL OR thumbh IS NULL`)
if err != nil {
return err
}
}
return parallelize(hexSHA1, makeThumbnailFor)
}
// --- Perceptual hash ---------------------------------------------------------
type linearImage struct {
img image.Image
}
func newLinearImage(img image.Image) *linearImage {
return &linearImage{img: img}
}
func (l *linearImage) ColorModel() color.Model { return l.img.ColorModel() }
func (l *linearImage) Bounds() image.Rectangle { return l.img.Bounds() }
func unSRGB(c uint32) uint8 {
n := float64(c) / 0xffff
if n <= 0.04045 {
return uint8(n * (255.0 / 12.92))
}
return uint8(math.Pow((n+0.055)/(1.055), 2.4) * 255.0)
}
func (l *linearImage) At(x, y int) color.Color {
r, g, b, a := l.img.At(x, y).RGBA()
return color.RGBA{
R: unSRGB(r), G: unSRGB(g), B: unSRGB(b), A: uint8(a >> 8)}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// isWebPAnimation returns whether the given ReadSeeker starts a WebP animation.
// See https://developers.google.com/speed/webp/docs/riff_container
func isWebPAnimation(rs io.ReadSeeker) (bool, error) {
b := make([]byte, 17)
if _, err := rs.Read(b); err != nil {
return false, err
}
if _, err := rs.Seek(0, io.SeekStart); err != nil {
return false, err
}
return bytes.Equal(b[:4], []byte("RIFF")) &&
bytes.Equal(b[8:16], []byte("WEBPVP8X")) &&
b[16]&0b00000010 != 0, nil
}
var errIsAnimation = errors.New("cannot perceptually hash animations")
func dhashWebP(rs io.ReadSeeker) (uint64, error) {
if a, err := isWebPAnimation(rs); err != nil {
return 0, err
} else if a {
return 0, errIsAnimation
}
// Doing this entire thing in Go is SLOW, but convenient.
source, err := webp.Decode(rs)
if err != nil {
return 0, err
}
var (
linear = newLinearImage(source)
resized = image.NewNRGBA64(image.Rect(0, 0, 9, 8))
)
draw.CatmullRom.Scale(resized, resized.Bounds(),
linear, linear.Bounds(), draw.Src, nil)
var hash uint64
for y := 0; y < 8; y++ {
var grey [9]float32
for x := 0; x < 9; x++ {
rgba := resized.NRGBA64At(x, y)
grey[x] = 0.2126*float32(rgba.R) +
0.7152*float32(rgba.G) +
0.0722*float32(rgba.B)
}
var row uint64
if grey[0] < grey[1] {
row |= 1 << 7
}
if grey[1] < grey[2] {
row |= 1 << 6
}
if grey[2] < grey[3] {
row |= 1 << 5
}
if grey[3] < grey[4] {
row |= 1 << 4
}
if grey[4] < grey[5] {
row |= 1 << 3
}
if grey[5] < grey[6] {
row |= 1 << 2
}
if grey[6] < grey[7] {
row |= 1 << 1
}
if grey[7] < grey[8] {
row |= 1 << 0
}
hash = hash<<8 | row
}
return hash, nil
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func makeDhash(sha1 string) (uint64, error) {
pathThumb := thumbPath(sha1)
f, err := os.Open(pathThumb)
if err != nil {
return 0, err
}
defer f.Close()
return dhashWebP(f)
}
func makeDhashFor(sha1 string) (message string, err error) {
hash, err := makeDhash(sha1)
if errors.Is(err, errIsAnimation) {
// Ignoring this common condition.
return "", nil
} else if err != nil {
return err.Error(), nil
}
_, err = db.Exec(
`UPDATE image SET dhash = ? WHERE sha1 = ?`, int64(hash), sha1)
return "", err
}
// cmdDhash generates perceptual hash from thumbnails.
func cmdDhash(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD [SHA1...]")
}
if err := openDB(args[0]); err != nil {
return err
}
hexSHA1 := args[1:]
if len(hexSHA1) == 0 {
var err error
hexSHA1, err = dbCollectStrings(`
SELECT sha1 FROM image WHERE dhash IS NULL`)
if err != nil {
return err
}
}
return parallelize(hexSHA1, makeDhashFor)
}
// --- Main --------------------------------------------------------------------
var commands = map[string]struct {
handler func(args []string) error
}{
"init": {cmdInit},
"web": {cmdWeb},
"import": {cmdImport},
"tag": {cmdTag},
"sync": {cmdSync},
"check": {cmdCheck},
"thumbnail": {cmdThumbnail},
"dhash": {cmdDhash},
}
func main() {
if len(os.Args) <= 2 {
log.Fatalln("Missing arguments")
}
cmd, ok := commands[os.Args[1]]
if !ok {
log.Fatalln("Unknown command: " + os.Args[1])
}
taskSemaphore = newSemaphore(runtime.NumCPU())
err := cmd.handler(os.Args[2:])
// Note that the database object has a closing finalizer,
// we just additionally print any errors coming from there.
if db != nil {
if err := db.Close(); err != nil {
log.Println(err)
}
}
if err != nil {
log.Fatalln(err)
}
}