package main
import (
"bufio"
"bytes"
"context"
"crypto/sha1"
"database/sql"
"encoding/hex"
"errors"
"fmt"
"html/template"
"io"
"io/fs"
"log"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"time"
_ "github.com/mattn/go-sqlite3"
"golang.org/x/sync/semaphore"
)
var (
db *sql.DB // sqlite database
galleryDirectory string // gallery directory
// taskSemaphore limits parallel computations.
taskSemaphore *semaphore.Weighted
)
func openDB(directory string) error {
var err error
db, err = sql.Open("sqlite3", "file:"+filepath.Join(directory,
"gallery.db?_foreign_keys=1&_busy_timeout=1000"))
galleryDirectory = directory
return err
}
func imagePath(sha1 string) string {
return filepath.Join(galleryDirectory, "images", sha1[:2], sha1)
}
func thumbPath(sha1 string) string {
return filepath.Join(galleryDirectory, "thumbs", sha1[:2], sha1+".webp")
}
func dbCollectStrings(query string) ([]string, error) {
rows, err := db.Query(query)
if err != nil {
return nil, err
}
defer rows.Close()
var result []string
for rows.Next() {
var s string
if err := rows.Scan(&s); err != nil {
return nil, err
}
result = append(result, s)
}
if err := rows.Err(); err != nil {
return nil, err
}
return result, nil
}
// --- Progress bar ------------------------------------------------------------
type progressBar struct {
mutex sync.Mutex
current int
target int
}
func newProgressBar(target int) *progressBar {
pb := &progressBar{current: 0, target: target}
pb.update()
return pb
}
func (pb *progressBar) Stop() {
// The minimum thing that works: just print a newline.
os.Stdout.WriteString("\n")
}
func (pb *progressBar) update() {
target := fmt.Sprintf("%d", pb.target)
fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target,
int(float32(pb.current)/float32(pb.target)*100))
}
func (pb *progressBar) Step() {
pb.mutex.Lock()
defer pb.mutex.Unlock()
pb.current++
pb.update()
}
// --- Initialization ----------------------------------------------------------
// cmdInit initializes a "gallery directory" that contains gallery.sqlite,
// images, thumbs.
func cmdInit(args []string) error {
if len(args) != 1 {
return errors.New("usage: GD")
}
if err := openDB(args[0]); err != nil {
return err
}
if _, err := db.Exec(initializeSQL); err != nil {
return err
}
// XXX: There's technically no reason to keep images as symlinks,
// we might just keep absolute paths in the database as well.
if err := os.MkdirAll(
filepath.Join(galleryDirectory, "images"), 0755); err != nil {
return err
}
if err := os.MkdirAll(
filepath.Join(galleryDirectory, "thumbs"), 0755); err != nil {
return err
}
return nil
}
// --- Web ---------------------------------------------------------------------
var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
var staticHandler http.Handler
var page = template.Must(template.New("/").Parse(`
Gallery
{{ .Name }}
{{ range .Children }}
- {{ . }}
{{ end }}
{{ range .Entries }}
{{ end }}
`))
// XXX: This is preliminary.
type entry struct {
Parent int64
Name string
Mtime int64
Sha1 string
Thumbw int
Thumbh int
Dhash int64
}
// XXX: This is preliminary.
type directory struct {
Id int64
Name string
Parent int64
Children []int64
Entries []entry
}
func dbCollectDirectory(id int64) (directory, error) {
d := directory{Id: id}
dbID := sql.NullInt64{Int64: id, Valid: id != 0}
if id != 0 {
err := db.QueryRow(`SELECT name, IFNULL(parent, 0)
FROM directory WHERE id IS ?`, dbID).Scan(&d.Name, &d.Parent)
if err != nil {
return d, err
}
}
rows1, err := db.Query(`SELECT id FROM directory WHERE parent IS ?`, dbID)
if err != nil {
return d, err
}
defer rows1.Close()
for rows1.Next() {
var child int64
if err := rows1.Scan(&child); err != nil {
return d, err
}
d.Children = append(d.Children, child)
}
if err := rows1.Err(); err != nil {
return d, err
}
rows2, err := db.Query(`SELECT IFNULL(entry.parent, 0),
entry.name, entry.mtime, entry.sha1,
IFNULL(image.thumbw, 0), IFNULL(image.thumbh, 0), IFNULL(image.dhash, 0)
FROM entry JOIN image ON entry.sha1 = image.sha1
WHERE entry.parent IS ?`, dbID)
if err != nil {
return d, err
}
defer rows2.Close()
for rows2.Next() {
var e entry
if err := rows2.Scan(&e.Parent, &e.Name, &e.Mtime, &e.Sha1,
&e.Thumbw, &e.Thumbh, &e.Dhash); err != nil {
return d, err
}
d.Entries = append(d.Entries, e)
}
return d, rows2.Err()
}
func handleRequest(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
staticHandler.ServeHTTP(w, r)
return
}
id, _ := strconv.ParseInt(r.URL.Query().Get("id"), 10, 64)
d, err := dbCollectDirectory(id)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
if err := page.Execute(w, d); err != nil {
log.Println(err)
}
}
func handleImages(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, imagePath(m[1]))
}
}
func handleThumbs(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, thumbPath(m[1]))
}
}
// cmdRun runs a web UI against GD on ADDRESS.
func cmdRun(args []string) error {
if len(args) != 2 {
return errors.New("usage: GD ADDRESS")
}
if err := openDB(args[0]); err != nil {
return err
}
address := args[1]
// This separation is not strictly necessary,
// but having an elementary level of security doesn't hurt either.
staticHandler = http.FileServer(http.Dir("public"))
// TODO: Make sure the database handle isn't used concurrently.
http.HandleFunc("/", handleRequest)
http.HandleFunc("/image/", handleImages)
http.HandleFunc("/thumb/", handleThumbs)
// TODO: Add a few API endpoints.
host, port, err := net.SplitHostPort(address)
if err != nil {
log.Println(err)
} else if host == "" {
log.Println("http://" + net.JoinHostPort("localhost", port))
} else {
log.Println("http://" + address)
}
s := &http.Server{
Addr: address,
ReadTimeout: 60 * time.Second,
WriteTimeout: 60 * time.Second,
MaxHeaderBytes: 32 << 10,
}
return s.ListenAndServe()
}
// --- Import ------------------------------------------------------------------
type directoryManager struct {
cache map[string]int64 // Unix-style paths to directory.id
}
func (dm *directoryManager) uncachedIDForPath(
tx *sql.Tx, path []string) (int64, error) {
var parent sql.NullInt64
for _, name := range path {
if err := tx.QueryRow(
`SELECT id FROM directory WHERE name = ? AND parent IS ?`,
name, parent).Scan(&parent); err == nil {
continue
} else if !errors.Is(err, sql.ErrNoRows) {
return 0, err
}
if result, err := tx.Exec(
`INSERT INTO directory(name, parent) VALUES (?, ?)`,
name, parent); err != nil {
return 0, err
} else if id, err := result.LastInsertId(); err != nil {
return 0, err
} else {
parent = sql.NullInt64{Int64: id, Valid: true}
}
}
return parent.Int64, nil
}
func (dm *directoryManager) IDForDirectoryPath(
tx *sql.Tx, path string) (int64, error) {
// Relative paths could be handled differently,
// but right now, they're assumed to start at the root.
path = filepath.ToSlash(filepath.Clean(path))
list := strings.Split(path, "/")
if len(list) > 1 && list[0] == "" {
list = list[1:]
}
if len(list) == 0 {
return 0, nil
}
if dm.cache == nil {
dm.cache = make(map[string]int64)
} else if id, ok := dm.cache[path]; ok {
return id, nil
}
id, err := dm.uncachedIDForPath(tx, list)
if err != nil {
return 0, err
}
dm.cache[path] = id
return id, nil
}
func isImage(path string) (bool, error) {
cmd := exec.Command("xdg-mime", "query", "filetype", path)
// XXX: Early returns may leak resources.
stdout, err := cmd.StdoutPipe()
if err != nil {
return false, err
}
if err := cmd.Start(); err != nil {
return false, err
}
out, err := io.ReadAll(stdout)
if err != nil {
return false, err
}
if err := cmd.Wait(); err != nil {
return false, err
}
return bytes.HasPrefix(out, []byte("image/")), nil
}
type importer struct {
dm directoryManager
dmMutex sync.Mutex
}
func (i *importer) Import(path string) error {
// The input may be a relative path, and we want to remember it as such,
// but symlinks for the images must be absolute.
absPath, err := filepath.Abs(path)
if err != nil {
return err
}
// Skip videos, which ImageMagick can process, but we don't want it to,
// so that they're not converted 1:1 to WebP.
pathIsImage, err := isImage(path)
if err != nil {
return err
}
if !pathIsImage {
return nil
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
s, err := f.Stat()
if err != nil {
return err
}
hash := sha1.New()
_, err = io.CopyBuffer(hash, f, make([]byte, 65536))
if err != nil {
return err
}
hexSHA1 := hex.EncodeToString(hash.Sum(nil))
pathImage := imagePath(hexSHA1)
imageDirname, _ := filepath.Split(pathImage)
if err := os.MkdirAll(imageDirname, 0755); err != nil {
return err
}
if err := os.Symlink(absPath, pathImage); err != nil &&
!errors.Is(err, fs.ErrExist) {
return err
}
// A concurrent transaction could be aborted, yet still result in
// creating directoryManager's cache entry, therefore this scope.
// TODO: Educate self about isolation levels and reconsider.
// Perhaps get rid of the cache.
i.dmMutex.Lock()
defer i.dmMutex.Unlock()
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
if _, err = tx.Exec(`INSERT INTO image(sha1) VALUES (?)
ON CONFLICT(sha1) DO NOTHING`, hexSHA1); err != nil {
return err
}
dbDirname, dbBasename := filepath.Split(path)
dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname)
if err != nil {
return err
}
// FIXME: This disallows any entries directly in the root.
// TODO: Turn this into an upsert statement.
_, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1)
VALUES (?, ?, ?, ?)`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1)
if err != nil {
return err
}
return tx.Commit()
}
// cmdImport adds files to the "entry" table.
func cmdImport(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD ROOT...")
}
if err := openDB(args[0]); err != nil {
return err
}
// Make the first step collecting all the paths,
// in order to show more useful progress information.
paths := []string{}
cb := func(path string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return err
}
paths = append(paths, path)
return nil
}
for _, name := range args[1:] {
if err := filepath.WalkDir(name, cb); err != nil {
return err
}
}
pb := newProgressBar(len(paths))
defer pb.Stop()
i := importer{}
ctx, cancel := context.WithCancelCause(context.Background())
wg := sync.WaitGroup{}
for _, path := range paths {
if taskSemaphore.Acquire(ctx, 1) != nil {
break
}
wg.Add(1)
go func(path string) {
defer taskSemaphore.Release(1)
defer wg.Done()
if err := i.Import(path); err != nil {
cancel(err)
} else {
pb.Step()
}
}(path)
}
wg.Wait()
if ctx.Err() != nil {
return context.Cause(ctx)
}
return nil
}
// cmdSync is like import, but clears the "entry" table beforehands.
func cmdSync(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD ROOT...")
}
if err := openDB(args[0]); err != nil {
return err
}
// TODO
return nil
}
// --- Tagging -----------------------------------------------------------------
// cmdTag mass imports tags from data passed on stdin as a TSV
// of SHA1 TAG WEIGHT entries.
func cmdTag(args []string) error {
if len(args) < 2 || len(args) > 3 {
return errors.New("usage: GD SPACE [DESCRIPTION]")
}
if err := openDB(args[0]); err != nil {
return err
}
space := args[1]
var description sql.NullString
if len(args) >= 3 {
description = sql.NullString{String: args[2], Valid: true}
}
// Note that starting as a write transaction prevents deadlocks.
// Imports are rare, and just bulk load data, so this scope is fine.
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description)
VALUES (?, ?)`, space, description); err != nil {
return err
}
var spaceID int64
if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`,
space).Scan(&spaceID); err != nil {
return err
}
// TODO: Prepare statements for tag/assignment updates.
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
fields := strings.Split(scanner.Text(), "\t")
if len(fields) != 3 {
return errors.New("invalid input format")
}
sha1, tag := fields[0], fields[1]
weight, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
return err
}
if _, err := tx.Exec(`INSERT OR IGNORE INTO tag(space, name)
VALUES (?, ?);`, spaceID, tag); err != nil {
return nil
}
var tagID int64
if err := tx.QueryRow(`SELECT id FROM tag WHERE space = ? AND name = ?`,
spaceID, tag).Scan(&tagID); err != nil {
return err
}
if _, err := tx.Exec(`INSERT INTO tag_assignment(sha1, tag, weight)
VALUES (?, ?, ?) ON CONFLICT DO UPDATE SET weight = ?`,
sha1, tagID, weight, weight); err != nil {
return err
}
}
if err := scanner.Err(); err != nil {
return err
}
return tx.Commit()
}
// --- Check -------------------------------------------------------------------
// cmdCheck checks if all files tracked in the DB are accessible.
func cmdCheck(args []string) error {
if len(args) != 1 {
return errors.New("usage: GD")
}
if err := openDB(args[0]); err != nil {
return err
}
// TODO: Check if all hashes of DB entries have a statable image file,
// and that all images with thumb{w,h} have a thumbnail file. Perhaps.
return nil
}
// --- Thumbnailing ------------------------------------------------------------
func makeThumbnail(pathImage, pathThumb string) (int, int, error) {
thumbDirname, _ := filepath.Split(pathThumb)
if err := os.MkdirAll(thumbDirname, 0755); err != nil {
return 0, 0, err
}
// Create a normalized thumbnail. Since we don't particularly need
// any complex processing, such as surrounding of metadata,
// simply push it through ImageMagick.
//
// - http://www.ericbrasseur.org/gamma.html
// - https://www.imagemagick.org/Usage/thumbnails/
// - https://imagemagick.org/script/command-line-options.php#layers
//
// "info:" output is written for each frame, which is why we delete
// all of them but the first one beforehands.
//
// TODO: See if we can optimize resulting WebP animations.
// (Do -layers optimize* apply to this format at all?)
cmd := exec.Command("convert", "-limit", "thread", "1", pathImage,
"-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip",
"-resize", "256x128>", "-colorspace", "sRGB",
"-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:")
// XXX: Early returns may leak resources.
stdout, err := cmd.StdoutPipe()
if err != nil {
return 0, 0, err
}
if err := cmd.Start(); err != nil {
return 0, 0, err
}
out, err := io.ReadAll(stdout)
if err != nil {
return 0, 0, err
}
if err := cmd.Wait(); err != nil {
return 0, 0, err
}
var w, h int
_, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h)
return w, h, err
}
func makeThumbnailFor(sha1 string) error {
pathImage := imagePath(sha1)
pathThumb := thumbPath(sha1)
w, h, err := makeThumbnail(pathImage, pathThumb)
if err != nil {
return err
}
_, err = db.Exec(`UPDATE image SET thumbw = ?, thumbh = ?
WHERE sha1 = ?`, w, h, sha1)
return err
}
// cmdThumbnail generates missing thumbnails, in parallel.
func cmdThumbnail(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD [SHA1...]")
}
if err := openDB(args[0]); err != nil {
return err
}
hexSHA1 := args[1:]
if len(hexSHA1) == 0 {
// Get all unique images in the database with no thumbnail.
var err error
hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image
WHERE thumbw IS NULL OR thumbh IS NULL`)
if err != nil {
return err
}
}
pb := newProgressBar(len(hexSHA1))
defer pb.Stop()
ctx, cancel := context.WithCancelCause(context.Background())
wg := sync.WaitGroup{}
for _, sha1 := range hexSHA1 {
if taskSemaphore.Acquire(ctx, 1) != nil {
break
}
wg.Add(1)
go func(sha1 string) {
defer taskSemaphore.Release(1)
defer wg.Done()
if err := makeThumbnailFor(sha1); err != nil {
cancel(err)
} else {
pb.Step()
}
}(sha1)
}
wg.Wait()
if ctx.Err() != nil {
return context.Cause(ctx)
}
return nil
}
// --- Perceptual hash ---------------------------------------------------------
func makeDhash(hasher, pathThumb string) (uint64, error) {
cmd := exec.Command(hasher, pathThumb)
// XXX: Early returns may leak resources.
stdout, err := cmd.StdoutPipe()
if err != nil {
return 0, err
}
if err := cmd.Start(); err != nil {
return 0, err
}
out, err := io.ReadAll(stdout)
if err != nil {
return 0, err
}
if err := cmd.Wait(); err != nil {
return 0, err
}
var hash uint64
_, err = fmt.Fscanf(bytes.NewReader(out), "%x", &hash)
return hash, err
}
// cmdDhash generates perceptual hash from thumbnails.
func cmdDhash(args []string) error {
if len(args) < 1 {
return errors.New("usage: GD HASHER [SHA1...]")
}
if err := openDB(args[0]); err != nil {
return err
}
hasher, hexSHA1 := args[1], args[2:]
if len(hexSHA1) == 0 {
var err error
hexSHA1, err = dbCollectStrings(`
SELECT sha1 FROM image WHERE dhash IS NULL`)
if err != nil {
return err
}
}
pb := newProgressBar(len(hexSHA1))
defer pb.Stop()
// TODO: Also run the hasher in parallel, once it becomes a problem.
// And/or run it in batches, since start-up time of the hasher
// poses considerable overhead with large amounts of images.
for _, sha1 := range hexSHA1 {
pathThumb := thumbPath(sha1)
hash, err := makeDhash(hasher, pathThumb)
if err != nil {
return err
}
_, err = db.Exec(`UPDATE image SET dhash = ? WHERE sha1 = ?`,
int64(hash), sha1)
if err != nil {
return err
}
pb.Step()
}
return nil
}
// --- Main --------------------------------------------------------------------
var commands = map[string]struct {
handler func(args []string) error
}{
"init": {cmdInit},
"run": {cmdRun},
"import": {cmdImport},
"tag": {cmdTag},
"sync": {cmdSync},
"check": {cmdCheck},
"thumbnail": {cmdThumbnail},
"dhash": {cmdDhash},
}
func main() {
if len(os.Args) <= 2 {
log.Fatalln("Missing arguments")
}
cmd, ok := commands[os.Args[1]]
if !ok {
log.Fatalln("Unknown command: " + os.Args[1])
}
taskSemaphore = semaphore.NewWeighted(int64(runtime.NumCPU()))
err := cmd.handler(os.Args[2:])
// Note that the database object has a closing finalizer,
// we just additionally print any errors coming from there.
if db != nil {
if err := db.Close(); err != nil {
log.Println(err)
}
}
if err != nil {
log.Fatalln(err)
}
}