package main import ( "bufio" "bytes" "context" "crypto/sha1" "database/sql" "encoding/hex" "errors" "fmt" "html/template" "io" "io/fs" "log" "net" "net/http" "os" "os/exec" "path/filepath" "regexp" "runtime" "strconv" "strings" "sync" "time" _ "github.com/mattn/go-sqlite3" "golang.org/x/sync/semaphore" ) var ( db *sql.DB // sqlite database galleryDirectory string // gallery directory // taskSemaphore limits parallel computations. taskSemaphore *semaphore.Weighted ) func openDB(directory string) error { var err error db, err = sql.Open("sqlite3", "file:"+filepath.Join(directory, "gallery.db?_foreign_keys=1&_busy_timeout=1000")) galleryDirectory = directory return err } func imagePath(sha1 string) string { return filepath.Join(galleryDirectory, "images", sha1[:2], sha1) } func thumbPath(sha1 string) string { return filepath.Join(galleryDirectory, "thumbs", sha1[:2], sha1+".webp") } func dbCollectStrings(query string) ([]string, error) { rows, err := db.Query(query) if err != nil { return nil, err } defer rows.Close() var result []string for rows.Next() { var s string if err := rows.Scan(&s); err != nil { return nil, err } result = append(result, s) } if err := rows.Err(); err != nil { return nil, err } return result, nil } // --- Progress bar ------------------------------------------------------------ type progressBar struct { mutex sync.Mutex current int target int } func newProgressBar(target int) *progressBar { pb := &progressBar{current: 0, target: target} pb.update() return pb } func (pb *progressBar) Stop() { // The minimum thing that works: just print a newline. os.Stdout.WriteString("\n") } func (pb *progressBar) update() { target := fmt.Sprintf("%d", pb.target) fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, int(float32(pb.current)/float32(pb.target)*100)) } func (pb *progressBar) Step() { pb.mutex.Lock() defer pb.mutex.Unlock() pb.current++ pb.update() } // --- Initialization ---------------------------------------------------------- // cmdInit initializes a "gallery directory" that contains gallery.sqlite, // images, thumbs. func cmdInit(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } if _, err := db.Exec(initializeSQL); err != nil { return err } // XXX: There's technically no reason to keep images as symlinks, // we might just keep absolute paths in the database as well. if err := os.MkdirAll( filepath.Join(galleryDirectory, "images"), 0755); err != nil { return err } if err := os.MkdirAll( filepath.Join(galleryDirectory, "thumbs"), 0755); err != nil { return err } return nil } // --- Web --------------------------------------------------------------------- var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`) var staticHandler http.Handler var page = template.Must(template.New("/").Parse(` Gallery

{{ .Name }}

{{ range .Entries }} {{ end }} `)) // XXX: This is preliminary. type entry struct { Parent int64 Name string Mtime int64 Sha1 string Thumbw int Thumbh int Dhash int64 } // XXX: This is preliminary. type directory struct { Id int64 Name string Parent int64 Children []int64 Entries []entry } func dbCollectDirectory(id int64) (directory, error) { d := directory{Id: id} dbID := sql.NullInt64{Int64: id, Valid: id != 0} if id != 0 { err := db.QueryRow(`SELECT name, IFNULL(parent, 0) FROM directory WHERE id IS ?`, dbID).Scan(&d.Name, &d.Parent) if err != nil { return d, err } } rows1, err := db.Query(`SELECT id FROM directory WHERE parent IS ?`, dbID) if err != nil { return d, err } defer rows1.Close() for rows1.Next() { var child int64 if err := rows1.Scan(&child); err != nil { return d, err } d.Children = append(d.Children, child) } if err := rows1.Err(); err != nil { return d, err } rows2, err := db.Query(`SELECT IFNULL(entry.parent, 0), entry.name, entry.mtime, entry.sha1, IFNULL(image.thumbw, 0), IFNULL(image.thumbh, 0), IFNULL(image.dhash, 0) FROM entry JOIN image ON entry.sha1 = image.sha1 WHERE entry.parent IS ?`, dbID) if err != nil { return d, err } defer rows2.Close() for rows2.Next() { var e entry if err := rows2.Scan(&e.Parent, &e.Name, &e.Mtime, &e.Sha1, &e.Thumbw, &e.Thumbh, &e.Dhash); err != nil { return d, err } d.Entries = append(d.Entries, e) } return d, rows2.Err() } func handleRequest(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { staticHandler.ServeHTTP(w, r) return } id, _ := strconv.ParseInt(r.URL.Query().Get("id"), 10, 64) d, err := dbCollectDirectory(id) if err != nil { http.Error(w, err.Error(), 500) return } if err := page.Execute(w, d); err != nil { log.Println(err) } } func handleImages(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, imagePath(m[1])) } } func handleThumbs(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, thumbPath(m[1])) } } // cmdRun runs a web UI against GD on ADDRESS. func cmdRun(args []string) error { if len(args) != 2 { return errors.New("usage: GD ADDRESS") } if err := openDB(args[0]); err != nil { return err } address := args[1] // This separation is not strictly necessary, // but having an elementary level of security doesn't hurt either. staticHandler = http.FileServer(http.Dir("public")) // TODO: Make sure the database handle isn't used concurrently. http.HandleFunc("/", handleRequest) http.HandleFunc("/image/", handleImages) http.HandleFunc("/thumb/", handleThumbs) // TODO: Add a few API endpoints. host, port, err := net.SplitHostPort(address) if err != nil { log.Println(err) } else if host == "" { log.Println("http://" + net.JoinHostPort("localhost", port)) } else { log.Println("http://" + address) } s := &http.Server{ Addr: address, ReadTimeout: 60 * time.Second, WriteTimeout: 60 * time.Second, MaxHeaderBytes: 32 << 10, } return s.ListenAndServe() } // --- Import ------------------------------------------------------------------ type directoryManager struct { cache map[string]int64 // Unix-style paths to directory.id } func (dm *directoryManager) uncachedIDForPath( tx *sql.Tx, path []string) (int64, error) { var parent sql.NullInt64 for _, name := range path { if err := tx.QueryRow( `SELECT id FROM directory WHERE name = ? AND parent IS ?`, name, parent).Scan(&parent); err == nil { continue } else if !errors.Is(err, sql.ErrNoRows) { return 0, err } if result, err := tx.Exec( `INSERT INTO directory(name, parent) VALUES (?, ?)`, name, parent); err != nil { return 0, err } else if id, err := result.LastInsertId(); err != nil { return 0, err } else { parent = sql.NullInt64{Int64: id, Valid: true} } } return parent.Int64, nil } func (dm *directoryManager) IDForDirectoryPath( tx *sql.Tx, path string) (int64, error) { // Relative paths could be handled differently, // but right now, they're assumed to start at the root. path = filepath.ToSlash(filepath.Clean(path)) list := strings.Split(path, "/") if len(list) > 1 && list[0] == "" { list = list[1:] } if len(list) == 0 { return 0, nil } if dm.cache == nil { dm.cache = make(map[string]int64) } else if id, ok := dm.cache[path]; ok { return id, nil } id, err := dm.uncachedIDForPath(tx, list) if err != nil { return 0, err } dm.cache[path] = id return id, nil } func isImage(path string) (bool, error) { cmd := exec.Command("xdg-mime", "query", "filetype", path) // XXX: Early returns may leak resources. stdout, err := cmd.StdoutPipe() if err != nil { return false, err } if err := cmd.Start(); err != nil { return false, err } out, err := io.ReadAll(stdout) if err != nil { return false, err } if err := cmd.Wait(); err != nil { return false, err } return bytes.HasPrefix(out, []byte("image/")), nil } type importer struct { dm directoryManager dmMutex sync.Mutex } func (i *importer) Import(path string) error { // The input may be a relative path, and we want to remember it as such, // but symlinks for the images must be absolute. absPath, err := filepath.Abs(path) if err != nil { return err } // Skip videos, which ImageMagick can process, but we don't want it to, // so that they're not converted 1:1 to WebP. pathIsImage, err := isImage(path) if err != nil { return err } if !pathIsImage { return nil } f, err := os.Open(path) if err != nil { return err } defer f.Close() s, err := f.Stat() if err != nil { return err } hash := sha1.New() _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) if err != nil { return err } hexSHA1 := hex.EncodeToString(hash.Sum(nil)) pathImage := imagePath(hexSHA1) imageDirname, _ := filepath.Split(pathImage) if err := os.MkdirAll(imageDirname, 0755); err != nil { return err } if err := os.Symlink(absPath, pathImage); err != nil && !errors.Is(err, fs.ErrExist) { return err } // We can't multiplex transactions on a single connection, // and the directoryManager isn't thread-safe. i.dmMutex.Lock() defer i.dmMutex.Unlock() tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err = tx.Exec(`INSERT INTO image(sha1) VALUES (?) ON CONFLICT(sha1) DO NOTHING`, hexSHA1); err != nil { return err } // XXX: The directoryManager's cache is questionable here, // if only because it keeps entries even when transactions fail. dbDirname, dbBasename := filepath.Split(path) dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname) if err != nil { return err } // FIXME: This disallows any entries directly in the root. _, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1) VALUES (?, ?, ?, ?) ON CONFLICT DO UPDATE SET mtime = ?, sha1 = ?`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1, s.ModTime().Unix(), hexSHA1) if err != nil { return err } return tx.Commit() } // cmdImport adds files to the "entry" table. func cmdImport(args []string) error { if len(args) < 1 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // Make the first step collecting all the paths, // in order to show more useful progress information. paths := []string{} cb := func(path string, d fs.DirEntry, err error) error { if err != nil || d.IsDir() { return err } paths = append(paths, path) return nil } for _, name := range args[1:] { if err := filepath.WalkDir(name, cb); err != nil { return err } } pb := newProgressBar(len(paths)) defer pb.Stop() i := importer{} ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, path := range paths { if taskSemaphore.Acquire(ctx, 1) != nil { break } wg.Add(1) go func(path string) { defer taskSemaphore.Release(1) defer wg.Done() if err := i.Import(path); err != nil { cancel(err) } else { pb.Step() } }(path) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // cmdSync is like import, but clears the "entry" table beforehands. func cmdSync(args []string) error { if len(args) < 1 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // TODO: Should this run in a transaction? return nil } // --- Tagging ----------------------------------------------------------------- // cmdTag mass imports tags from data passed on stdin as a TSV // of SHA1 TAG WEIGHT entries. func cmdTag(args []string) error { if len(args) < 2 || len(args) > 3 { return errors.New("usage: GD SPACE [DESCRIPTION]") } if err := openDB(args[0]); err != nil { return err } space := args[1] var description sql.NullString if len(args) >= 3 { description = sql.NullString{String: args[2], Valid: true} } // Note that starting as a write transaction prevents deadlocks. // Imports are rare, and just bulk load data, so this scope is fine. tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description) VALUES (?, ?)`, space, description); err != nil { return err } var spaceID int64 if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`, space).Scan(&spaceID); err != nil { return err } // XXX: It might make sense to pre-erase all tag assignments within // the given space for that image, the first time we see it: // // DELETE FROM tag_assignment // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?) // // or even just clear the tag space completely: // // DELETE FROM tag_assignment // WHERE tag IN (SELECT id FROM tag WHERE space = ?); // DELETE FROM tag WHERE space = ?; stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight) VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?) ON CONFLICT DO UPDATE SET weight = ?`) if err != nil { return err } scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { fields := strings.Split(scanner.Text(), "\t") if len(fields) != 3 { return errors.New("invalid input format") } sha1, tag := fields[0], fields[1] weight, err := strconv.ParseFloat(fields[2], 64) if err != nil { return err } if _, err := tx.Exec( `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`, spaceID, tag); err != nil { return nil } if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil { return err } } if err := scanner.Err(); err != nil { return err } return tx.Commit() } // --- Check ------------------------------------------------------------------- // cmdCheck checks if all files tracked in the DB are accessible. func cmdCheck(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } // TODO: Check if all hashes of DB entries have a statable image file, // and that all images with thumb{w,h} have a thumbnail file. Perhaps. return nil } // --- Thumbnailing ------------------------------------------------------------ func makeThumbnail(pathImage, pathThumb string) (int, int, error) { thumbDirname, _ := filepath.Split(pathThumb) if err := os.MkdirAll(thumbDirname, 0755); err != nil { return 0, 0, err } // Create a normalized thumbnail. Since we don't particularly need // any complex processing, such as surrounding of metadata, // simply push it through ImageMagick. // // - http://www.ericbrasseur.org/gamma.html // - https://www.imagemagick.org/Usage/thumbnails/ // - https://imagemagick.org/script/command-line-options.php#layers // // "info:" output is written for each frame, which is why we delete // all of them but the first one beforehands. // // TODO: See if we can optimize resulting WebP animations. // (Do -layers optimize* apply to this format at all?) cmd := exec.Command("convert", "-limit", "thread", "1", pathImage, "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip", "-resize", "256x128>", "-colorspace", "sRGB", "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:") // XXX: Early returns may leak resources. stdout, err := cmd.StdoutPipe() if err != nil { return 0, 0, err } if err := cmd.Start(); err != nil { return 0, 0, err } out, err := io.ReadAll(stdout) if err != nil { return 0, 0, err } if err := cmd.Wait(); err != nil { return 0, 0, err } var w, h int _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) return w, h, err } func makeThumbnailFor(sha1 string) error { pathImage := imagePath(sha1) pathThumb := thumbPath(sha1) w, h, err := makeThumbnail(pathImage, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`, w, h, sha1) return err } // cmdThumbnail generates missing thumbnails, in parallel. func cmdThumbnail(args []string) error { if len(args) < 1 { return errors.New("usage: GD [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hexSHA1 := args[1:] if len(hexSHA1) == 0 { // Get all unique images in the database with no thumbnail. var err error hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image WHERE thumbw IS NULL OR thumbh IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, sha1 := range hexSHA1 { if taskSemaphore.Acquire(ctx, 1) != nil { break } wg.Add(1) go func(sha1 string) { defer taskSemaphore.Release(1) defer wg.Done() if err := makeThumbnailFor(sha1); err != nil { cancel(err) } else { pb.Step() } }(sha1) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // --- Perceptual hash --------------------------------------------------------- func makeDhash(hasher, pathThumb string) (uint64, error) { cmd := exec.Command(hasher, pathThumb) // XXX: Early returns may leak resources. stdout, err := cmd.StdoutPipe() if err != nil { return 0, err } if err := cmd.Start(); err != nil { return 0, err } out, err := io.ReadAll(stdout) if err != nil { return 0, err } if err := cmd.Wait(); err != nil { return 0, err } var hash uint64 _, err = fmt.Fscanf(bytes.NewReader(out), "%x", &hash) return hash, err } // cmdDhash generates perceptual hash from thumbnails. func cmdDhash(args []string) error { if len(args) < 1 { return errors.New("usage: GD HASHER [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hasher, hexSHA1 := args[1], args[2:] if len(hexSHA1) == 0 { var err error hexSHA1, err = dbCollectStrings(` SELECT sha1 FROM image WHERE dhash IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() // TODO: Also run the hasher in parallel, once it becomes a problem. // And/or run it in batches, since start-up time of the hasher // poses considerable overhead with large amounts of images. for _, sha1 := range hexSHA1 { pathThumb := thumbPath(sha1) hash, err := makeDhash(hasher, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET dhash = ? WHERE sha1 = ?`, int64(hash), sha1) if err != nil { return err } pb.Step() } return nil } // --- Main -------------------------------------------------------------------- var commands = map[string]struct { handler func(args []string) error }{ "init": {cmdInit}, "run": {cmdRun}, "import": {cmdImport}, "tag": {cmdTag}, "sync": {cmdSync}, "check": {cmdCheck}, "thumbnail": {cmdThumbnail}, "dhash": {cmdDhash}, } func main() { if len(os.Args) <= 2 { log.Fatalln("Missing arguments") } cmd, ok := commands[os.Args[1]] if !ok { log.Fatalln("Unknown command: " + os.Args[1]) } taskSemaphore = semaphore.NewWeighted(int64(runtime.NumCPU())) err := cmd.handler(os.Args[2:]) // Note that the database object has a closing finalizer, // we just additionally print any errors coming from there. if db != nil { if err := db.Close(); err != nil { log.Println(err) } } if err != nil { log.Fatalln(err) } }