package main import ( "bufio" "bytes" "context" "crypto/sha1" "database/sql" "encoding/hex" "encoding/json" "errors" "fmt" "html/template" "io" "io/fs" "log" "math/bits" "net" "net/http" "os" "os/exec" "path/filepath" "regexp" "runtime" "slices" "strconv" "strings" "sync" "time" "github.com/mattn/go-sqlite3" "golang.org/x/sync/semaphore" ) var ( db *sql.DB // sqlite database galleryDirectory string // gallery directory // taskSemaphore limits parallel computations. taskSemaphore *semaphore.Weighted ) func hammingDistance(a, b int64) int { return bits.OnesCount64(uint64(a) ^ uint64(b)) } func init() { sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { return conn.RegisterFunc("hamming", hammingDistance, true) }, }) } func openDB(directory string) error { var err error db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory, "gallery.db?_foreign_keys=1&_busy_timeout=1000")) galleryDirectory = directory return err } func imagePath(sha1 string) string { return filepath.Join(galleryDirectory, "images", sha1[:2], sha1) } func thumbPath(sha1 string) string { return filepath.Join(galleryDirectory, "thumbs", sha1[:2], sha1+".webp") } func dbCollectStrings(query string) ([]string, error) { rows, err := db.Query(query) if err != nil { return nil, err } defer rows.Close() var result []string for rows.Next() { var s string if err := rows.Scan(&s); err != nil { return nil, err } result = append(result, s) } if err := rows.Err(); err != nil { return nil, err } return result, nil } // --- Progress bar ------------------------------------------------------------ type progressBar struct { mutex sync.Mutex current int target int } func newProgressBar(target int) *progressBar { pb := &progressBar{current: 0, target: target} pb.update() return pb } func (pb *progressBar) Stop() { // The minimum thing that works: just print a newline. os.Stdout.WriteString("\n") } func (pb *progressBar) update() { target := fmt.Sprintf("%d", pb.target) fmt.Printf("\r%*d/%s (%2d%%)", len(target), pb.current, target, int(float32(pb.current)/float32(pb.target)*100)) } func (pb *progressBar) Step() { pb.mutex.Lock() defer pb.mutex.Unlock() pb.current++ pb.update() } // --- Initialization ---------------------------------------------------------- // cmdInit initializes a "gallery directory" that contains gallery.sqlite, // images, thumbs. func cmdInit(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } if _, err := db.Exec(initializeSQL); err != nil { return err } // XXX: There's technically no reason to keep images as symlinks, // we might just keep absolute paths in the database as well. if err := os.MkdirAll( filepath.Join(galleryDirectory, "images"), 0755); err != nil { return err } if err := os.MkdirAll( filepath.Join(galleryDirectory, "thumbs"), 0755); err != nil { return err } return nil } // --- Web --------------------------------------------------------------------- var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`) var staticHandler http.Handler var page = template.Must(template.New("/").Parse(` Gallery `)) func handleRequest(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/" { staticHandler.ServeHTTP(w, r) return } if err := page.Execute(w, nil); err != nil { log.Println(err) } } func handleImages(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, imagePath(m[1])) } } func handleThumbs(w http.ResponseWriter, r *http.Request) { if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil { http.NotFound(w, r) } else { http.ServeFile(w, r, thumbPath(m[1])) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) { // TODO: This is like dbCollectStrings(), just needs an argument. rows, err := tx.Query( `SELECT name FROM directory WHERE IFNULL(parent, 0) = ?`, parent) if err != nil { return nil, err } defer rows.Close() names = []string{} for rows.Next() { var name string if err := rows.Scan(&name); err != nil { return nil, err } names = append(names, name) } return names, rows.Err() } type webEntry struct { SHA1 string `json:"sha1"` Name string `json:"name"` Modified int64 `json:"modified"` ThumbW int64 `json:"thumbW"` ThumbH int64 `json:"thumbH"` } func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) { rows, err := tx.Query(` SELECT i.sha1, e.name, e.mtime, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0) FROM entry AS e JOIN image AS i ON e.sha1 = i.sha1 WHERE e.parent = ?`, parent) if err != nil { return nil, err } defer rows.Close() entries = []webEntry{} for rows.Next() { var e webEntry if err := rows.Scan( &e.SHA1, &e.Name, &e.Modified, &e.ThumbW, &e.ThumbH); err != nil { return nil, err } entries = append(entries, e) } return entries, rows.Err() } func handleAPIBrowse(w http.ResponseWriter, r *http.Request) { var params struct { Path string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Subdirectories []string `json:"subdirectories"` Entries []webEntry `json:"entries"` } tx, err := db.Begin() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } defer tx.Rollback() parent, err := idForPath(tx, decodeWebPath(params.Path), false) if err != nil { http.Error(w, err.Error(), http.StatusNotFound) return } result.Subdirectories, err = getSubdirectories(tx, parent) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Entries, err = getSubentries(tx, parent) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - func getImageDimensions(sha1 string) (w int64, h int64, err error) { err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`, sha1).Scan(&w, &h) return } func getImagePaths(sha1 string) (paths []string, err error) { rows, err := db.Query(`WITH RECURSIVE paths(parent, path) AS ( SELECT parent, name AS path FROM entry WHERE sha1 = ? UNION ALL SELECT d.parent, d.name || '/' || p.path FROM directory AS d JOIN paths AS p ON d.id = p.parent ) SELECT path FROM paths WHERE parent IS NULL`, sha1) if err != nil { return nil, err } defer rows.Close() paths = []string{} for rows.Next() { var path string if err := rows.Scan(&path); err != nil { return nil, err } paths = append(paths, path) } return paths, rows.Err() } func getImageTags(sha1 string) (map[string]map[string]float32, error) { rows, err := db.Query(` SELECT ts.name, t.name, ta.weight FROM tag_assignment AS ta JOIN tag AS t ON t.id = ta.tag JOIN tag_space AS ts ON ts.id = t.space WHERE ta.sha1 = ?`, sha1) if err != nil { return nil, err } defer rows.Close() result := make(map[string]map[string]float32) for rows.Next() { var ( space, tag string weight float32 ) if err := rows.Scan(&space, &tag, &weight); err != nil { return nil, err } tags := result[space] if tags == nil { tags = make(map[string]float32) result[space] = tags } tags[tag] = weight } return result, rows.Err() } func handleAPIInfo(w http.ResponseWriter, r *http.Request) { var params struct { SHA1 string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Width int64 `json:"width"` Height int64 `json:"height"` Paths []string `json:"paths"` Tags map[string]map[string]float32 `json:"tags"` } var err error result.Width, result.Height, err = getImageDimensions(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Paths, err = getImagePaths(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Tags, err = getImageTags(params.SHA1) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type webSimilarImage struct { SHA1 string `json:"sha1"` PixelsRatio float32 `json:"pixelsRatio"` ThumbW int64 `json:"thumbW"` ThumbH int64 `json:"thumbH"` Paths []string `json:"paths"` } func getSimilar(sha1 string, pixels int64, distance int) ( result []webSimilarImage, err error) { // For distance ∈ {0, 1}, this query is quite inefficient. // In exchange, it's generic. // // If there's a dhash, there should also be thumbnail dimensions, // so not bothering with IFNULL on them. rows, err := db.Query(`SELECT sha1, width * height, thumbw, thumbh FROM image WHERE hamming(dhash, (SELECT dhash FROM image WHERE sha1 = ?)) = ? AND sha1 <> ?`, sha1, distance, sha1) if err != nil { return nil, err } defer rows.Close() result = []webSimilarImage{} for rows.Next() { var ( match webSimilarImage matchPixels int64 ) if err = rows.Scan(&match.SHA1, &matchPixels, &match.ThumbW, &match.ThumbH); err != nil { return nil, err } if match.Paths, err = getImagePaths(match.SHA1); err != nil { return nil, err } match.PixelsRatio = float32(matchPixels) / float32(pixels) result = append(result, match) } return result, rows.Err() } func handleAPISimilar(w http.ResponseWriter, r *http.Request) { var params struct { SHA1 string } if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } var result struct { Info webSimilarImage `json:"info"` Groups map[string][]webSimilarImage `json:"groups"` } result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1} if paths, err := getImagePaths(params.SHA1); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } else { result.Info.Paths = paths } var width, height int64 err := db.QueryRow(`SELECT width, height, thumbw, thumbh FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &result.Info.ThumbW, &result.Info.ThumbH) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result.Groups = make(map[string][]webSimilarImage) for distance := 0; distance <= 1; distance++ { result.Groups[fmt.Sprintf("Perceptual distance %d", distance)], err = getSimilar(params.SHA1, width*height, distance) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } } if err := json.NewEncoder(w).Encode(result); err != nil { log.Println(err) } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // cmdRun runs a web UI against GD on ADDRESS. func cmdRun(args []string) error { if len(args) != 2 { return errors.New("usage: GD ADDRESS") } if err := openDB(args[0]); err != nil { return err } address := args[1] // This separation is not strictly necessary, // but having an elementary level of security doesn't hurt either. staticHandler = http.FileServer(http.Dir("public")) http.HandleFunc("/", handleRequest) http.HandleFunc("/image/", handleImages) http.HandleFunc("/thumb/", handleThumbs) http.HandleFunc("/api/browse", handleAPIBrowse) http.HandleFunc("/api/info", handleAPIInfo) http.HandleFunc("/api/similar", handleAPISimilar) host, port, err := net.SplitHostPort(address) if err != nil { log.Println(err) } else if host == "" { log.Println("http://" + net.JoinHostPort("localhost", port)) } else { log.Println("http://" + address) } s := &http.Server{ Addr: address, ReadTimeout: 60 * time.Second, WriteTimeout: 60 * time.Second, MaxHeaderBytes: 32 << 10, } return s.ListenAndServe() } // --- Import ------------------------------------------------------------------ func idForPath(tx *sql.Tx, path []string, create bool) (int64, error) { var parent sql.NullInt64 for _, name := range path { if err := tx.QueryRow( `SELECT id FROM directory WHERE name = ? AND parent IS ?`, name, parent).Scan(&parent); err == nil { continue } else if !errors.Is(err, sql.ErrNoRows) { return 0, err } else if !create { return 0, err } if result, err := tx.Exec( `INSERT INTO directory(name, parent) VALUES (?, ?)`, name, parent); err != nil { return 0, err } else if id, err := result.LastInsertId(); err != nil { return 0, err } else { parent = sql.NullInt64{Int64: id, Valid: true} } } return parent.Int64, nil } func decodeWebPath(path string) []string { // Relative paths could be handled differently, // but right now, they're assumed to start at the root. result := []string{} for _, crumb := range strings.Split(path, "/") { if crumb != "" { result = append(result, crumb) } } return result } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - type directoryManager struct { cache map[string]int64 // Unix-style paths to directory.id } func (dm *directoryManager) IDForDirectoryPath( tx *sql.Tx, path string) (int64, error) { path = filepath.ToSlash(filepath.Clean(path)) list := decodeWebPath(path) if len(list) == 0 { return 0, nil } if dm.cache == nil { dm.cache = make(map[string]int64) } else if id, ok := dm.cache[path]; ok { return id, nil } id, err := idForPath(tx, list, true) if err != nil { return 0, err } dm.cache[path] = id return id, nil } func isImage(path string) (bool, error) { out, err := exec.Command("xdg-mime", "query", "filetype", path).Output() if err != nil { return false, err } return bytes.HasPrefix(out, []byte("image/")), nil } func pingImage(path string) (int, int, error) { out, err := exec.Command("identify", "-limit", "thread", "1", "-ping", "-format", "%w %h", path+"[0]").Output() if err != nil { return 0, 0, err } var w, h int _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) return w, h, err } type importer struct { dm directoryManager dmMutex sync.Mutex } func (i *importer) Import(path string) error { // The input may be a relative path, and we want to remember it as such, // but symlinks for the images must be absolute. absPath, err := filepath.Abs(path) if err != nil { return err } // Skip videos, which ImageMagick can process, but we don't want it to, // so that they're not converted 1:1 to WebP. pathIsImage, err := isImage(path) if err != nil { return err } if !pathIsImage { return nil } width, height, err := pingImage(path) if err != nil { return err } f, err := os.Open(path) if err != nil { return err } defer f.Close() s, err := f.Stat() if err != nil { return err } hash := sha1.New() _, err = io.CopyBuffer(hash, f, make([]byte, 65536)) if err != nil { return err } hexSHA1 := hex.EncodeToString(hash.Sum(nil)) pathImage := imagePath(hexSHA1) imageDirname, _ := filepath.Split(pathImage) if err := os.MkdirAll(imageDirname, 0755); err != nil { return err } if err := os.Symlink(absPath, pathImage); err != nil && !errors.Is(err, fs.ErrExist) { return err } // The directoryManager isn't thread-safe. // This lock also simulates a timeout-less BEGIN EXCLUSIVE. i.dmMutex.Lock() defer i.dmMutex.Unlock() tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err = tx.Exec(`INSERT INTO image(sha1, width, height) VALUES (?, ?, ?) ON CONFLICT(sha1) DO NOTHING`, hexSHA1, width, height); err != nil { return err } // XXX: The directoryManager's cache is questionable here, // if only because it keeps entries even when transactions fail. dbDirname, dbBasename := filepath.Split(path) dbParent, err := i.dm.IDForDirectoryPath(tx, dbDirname) if err != nil { return err } // FIXME: This disallows any entries directly in the root. _, err = tx.Exec(`INSERT INTO entry(parent, name, mtime, sha1) VALUES (?, ?, ?, ?) ON CONFLICT DO UPDATE SET mtime = ?, sha1 = ?`, dbParent, dbBasename, s.ModTime().Unix(), hexSHA1, s.ModTime().Unix(), hexSHA1) if err != nil { return err } return tx.Commit() } // cmdImport adds files to the "entry" table. // TODO: Consider making this copy rather than symlink images. func cmdImport(args []string) error { if len(args) < 1 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // Make the first step collecting all the paths, // in order to show more useful progress information. paths := []string{} cb := func(path string, d fs.DirEntry, err error) error { if err != nil || d.IsDir() { return err } paths = append(paths, path) return nil } for _, name := range args[1:] { if err := filepath.WalkDir(name, cb); err != nil { return err } } pb := newProgressBar(len(paths)) defer pb.Stop() i := importer{} ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, path := range paths { if taskSemaphore.Acquire(ctx, 1) != nil { break } wg.Add(1) go func(path string) { defer taskSemaphore.Release(1) defer wg.Done() if err := i.Import(path); err != nil { cancel(err) } else { pb.Step() } }(path) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // cmdSync ensures the given (sub)roots are accurately reflected // in the database. func cmdSync(args []string) error { if len(args) < 1 { return errors.New("usage: GD ROOT...") } if err := openDB(args[0]); err != nil { return err } // TODO: Probably make this run in a transaction, // if only to get exclusivity. return nil } // --- Tagging ----------------------------------------------------------------- // cmdTag mass imports tags from data passed on stdin as a TSV // of SHA1 TAG WEIGHT entries. func cmdTag(args []string) error { if len(args) < 2 || len(args) > 3 { return errors.New("usage: GD SPACE [DESCRIPTION]") } if err := openDB(args[0]); err != nil { return err } space := args[1] var description sql.NullString if len(args) >= 3 { description = sql.NullString{String: args[2], Valid: true} } // Note that starting as a write transaction prevents deadlocks. // Imports are rare, and just bulk load data, so this scope is fine. tx, err := db.Begin() if err != nil { return err } defer tx.Rollback() if _, err := tx.Exec(`INSERT OR IGNORE INTO tag_space(name, description) VALUES (?, ?)`, space, description); err != nil { return err } var spaceID int64 if err := tx.QueryRow(`SELECT id FROM tag_space WHERE name = ?`, space).Scan(&spaceID); err != nil { return err } // XXX: It might make sense to pre-erase all tag assignments within // the given space for that image, the first time we see it: // // DELETE FROM tag_assignment // WHERE sha1 = ? AND tag IN (SELECT id FROM tag WHERE space = ?) // // or even just clear the tag space completely: // // DELETE FROM tag_assignment // WHERE tag IN (SELECT id FROM tag WHERE space = ?); // DELETE FROM tag WHERE space = ?; stmt, err := tx.Prepare(`INSERT INTO tag_assignment(sha1, tag, weight) VALUES (?, (SELECT id FROM tag WHERE space = ? AND name = ?), ?) ON CONFLICT DO UPDATE SET weight = ?`) if err != nil { return err } scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { fields := strings.Split(scanner.Text(), "\t") if len(fields) != 3 { return errors.New("invalid input format") } sha1, tag := fields[0], fields[1] weight, err := strconv.ParseFloat(fields[2], 64) if err != nil { return err } if _, err := tx.Exec( `INSERT OR IGNORE INTO tag(space, name) VALUES (?, ?);`, spaceID, tag); err != nil { return nil } if _, err := stmt.Exec(sha1, spaceID, tag, weight, weight); err != nil { return err } } if err := scanner.Err(); err != nil { return err } return tx.Commit() } // --- Check ------------------------------------------------------------------- func isValidSHA1(hash string) bool { if len(hash) != sha1.Size*2 || strings.ToLower(hash) != hash { return false } if _, err := hex.DecodeString(hash); err != nil { return false } return true } func hashesToFileListing(root, suffix string, hashes []string) []string { // Note that we're semi-duplicating {image,thumb}Path(). paths := []string{root} for _, hash := range hashes { dir := filepath.Join(root, hash[:2]) paths = append(paths, dir, filepath.Join(dir, hash+suffix)) } slices.Sort(paths) return slices.Compact(paths) } func collectFileListing(root string) (paths []string, err error) { err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { paths = append(paths, path) return err }) // Even though it should already be sorted somehow. slices.Sort(paths) return } func checkFiles(root, suffix string, hashes []string) (bool, []string, error) { db := hashesToFileListing(root, suffix, hashes) fs, err := collectFileListing(root) if err != nil { return false, nil, err } iDB, iFS, ok, intersection := 0, 0, true, []string{} for iDB < len(db) && iFS < len(fs) { if db[iDB] == fs[iFS] { intersection = append(intersection, db[iDB]) iDB++ iFS++ } else if db[iDB] < fs[iFS] { ok = false fmt.Printf("only in DB: %s\n", db[iDB]) iDB++ } else { ok = false fmt.Printf("only in FS: %s\n", fs[iFS]) iFS++ } } for _, path := range db[iDB:] { ok = false fmt.Printf("only in DB: %s\n", path) } for _, path := range fs[iFS:] { ok = false fmt.Printf("only in FS: %s\n", path) } return ok, intersection, nil } // cmdCheck carries out various database consistency checks. func cmdCheck(args []string) error { if len(args) != 1 { return errors.New("usage: GD") } if err := openDB(args[0]); err != nil { return err } // Check if hashes are in the right format. log.Println("checking image hashes") allSHA1, err := dbCollectStrings(`SELECT sha1 FROM image`) if err != nil { return err } ok := true for _, hash := range allSHA1 { if !isValidSHA1(hash) { ok = false fmt.Printf("invalid image SHA1: %s\n", hash) } } // This is, rather obviously, just a strict subset. // Although it doesn't run in the same transaction. thumbSHA1, err := dbCollectStrings(`SELECT sha1 FROM image WHERE thumbw IS NOT NULL OR thumbh IS NOT NULL`) if err != nil { return err } // This somewhat duplicates {image,thumb}Path(). log.Println("checking SQL against filesystem") okImages, intersection, err := checkFiles( filepath.Join(galleryDirectory, "images"), "", allSHA1) if err != nil { return err } okThumbs, _, err := checkFiles( filepath.Join(galleryDirectory, "thumbs"), ".webp", thumbSHA1) if err != nil { return err } if !okImages || !okThumbs { ok = false } // NOTE: We could also compare mtime, and on mismatch the current SHA1, // though that's more of a "sync" job. log.Println("checking for dead symlinks") for _, path := range intersection { if _, err := os.Stat(path); err != nil { ok = false fmt.Printf("%s: %s\n", path, err) } } if !ok { return errors.New("detected inconsistencies") } return nil } // --- Thumbnailing ------------------------------------------------------------ func makeThumbnail(pathImage, pathThumb string) (int, int, error) { thumbDirname, _ := filepath.Split(pathThumb) if err := os.MkdirAll(thumbDirname, 0755); err != nil { return 0, 0, err } // Create a normalized thumbnail. Since we don't particularly need // any complex processing, such as surrounding of metadata, // simply push it through ImageMagick. // // - http://www.ericbrasseur.org/gamma.html // - https://www.imagemagick.org/Usage/thumbnails/ // - https://imagemagick.org/script/command-line-options.php#layers // // "info:" output is written for each frame, which is why we delete // all of them but the first one beforehands. // // TODO: See if we can optimize resulting WebP animations. // (Do -layers optimize* apply to this format at all?) cmd := exec.Command("convert", "-limit", "thread", "1", pathImage, "-coalesce", "-colorspace", "RGB", "-auto-orient", "-strip", "-resize", "256x128>", "-colorspace", "sRGB", "-format", "%w %h", "+write", pathThumb, "-delete", "1--1", "info:") out, err := cmd.Output() if err != nil { return 0, 0, err } var w, h int _, err = fmt.Fscanf(bytes.NewReader(out), "%d %d", &w, &h) return w, h, err } func makeThumbnailFor(sha1 string) error { pathImage := imagePath(sha1) pathThumb := thumbPath(sha1) w, h, err := makeThumbnail(pathImage, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET thumbw = ?, thumbh = ? WHERE sha1 = ?`, w, h, sha1) return err } // cmdThumbnail generates missing thumbnails, in parallel. func cmdThumbnail(args []string) error { if len(args) < 1 { return errors.New("usage: GD [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hexSHA1 := args[1:] if len(hexSHA1) == 0 { // Get all unique images in the database with no thumbnail. var err error hexSHA1, err = dbCollectStrings(`SELECT sha1 FROM image WHERE thumbw IS NULL OR thumbh IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() ctx, cancel := context.WithCancelCause(context.Background()) wg := sync.WaitGroup{} for _, sha1 := range hexSHA1 { if taskSemaphore.Acquire(ctx, 1) != nil { break } wg.Add(1) go func(sha1 string) { defer taskSemaphore.Release(1) defer wg.Done() if err := makeThumbnailFor(sha1); err != nil { cancel(err) } else { pb.Step() } }(sha1) } wg.Wait() if ctx.Err() != nil { return context.Cause(ctx) } return nil } // --- Perceptual hash --------------------------------------------------------- func makeDhash(hasher, pathThumb string) (uint64, error) { out, err := exec.Command(hasher, pathThumb).Output() if err != nil { return 0, err } var hash uint64 _, err = fmt.Fscanf(bytes.NewReader(out), "%x", &hash) return hash, err } // cmdDhash generates perceptual hash from thumbnails. func cmdDhash(args []string) error { if len(args) < 1 { return errors.New("usage: GD HASHER [SHA1...]") } if err := openDB(args[0]); err != nil { return err } hasher, hexSHA1 := args[1], args[2:] if len(hexSHA1) == 0 { var err error hexSHA1, err = dbCollectStrings(` SELECT sha1 FROM image WHERE dhash IS NULL`) if err != nil { return err } } pb := newProgressBar(len(hexSHA1)) defer pb.Stop() // TODO: Also run the hasher in parallel, once it becomes a problem. // And/or run it in batches, since start-up time of the hasher // poses considerable overhead with large amounts of images. for _, sha1 := range hexSHA1 { pathThumb := thumbPath(sha1) hash, err := makeDhash(hasher, pathThumb) if err != nil { return err } _, err = db.Exec(`UPDATE image SET dhash = ? WHERE sha1 = ?`, int64(hash), sha1) if err != nil { return err } pb.Step() } return nil } // --- Main -------------------------------------------------------------------- var commands = map[string]struct { handler func(args []string) error }{ "init": {cmdInit}, "run": {cmdRun}, "import": {cmdImport}, "tag": {cmdTag}, "sync": {cmdSync}, "check": {cmdCheck}, "thumbnail": {cmdThumbnail}, "dhash": {cmdDhash}, } func main() { if len(os.Args) <= 2 { log.Fatalln("Missing arguments") } cmd, ok := commands[os.Args[1]] if !ok { log.Fatalln("Unknown command: " + os.Args[1]) } taskSemaphore = semaphore.NewWeighted(int64(runtime.NumCPU())) err := cmd.handler(os.Args[2:]) // Note that the database object has a closing finalizer, // we just additionally print any errors coming from there. if db != nil { if err := db.Close(); err != nil { log.Println(err) } } if err != nil { log.Fatalln(err) } }