diff options
| -rw-r--r-- | go.mod | 5 | ||||
| -rw-r--r-- | go.sum | 4 | ||||
| -rw-r--r-- | main.go | 203 | ||||
| -rw-r--r-- | public/gallery.js | 4 | 
4 files changed, 183 insertions, 33 deletions
| @@ -2,4 +2,7 @@ module janouch.name/gallery  go 1.21.4 -require github.com/mattn/go-sqlite3 v1.14.18 +require ( +	github.com/mattn/go-sqlite3 v1.14.19 +	golang.org/x/image v0.14.0 +) @@ -1,2 +1,6 @@  github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=  github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/mattn/go-sqlite3 v1.14.19 h1:fhGleo2h1p8tVChob4I9HpmVFIAkKGpiukdrgQbWfGI= +github.com/mattn/go-sqlite3 v1.14.19/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4= +golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= @@ -11,9 +11,12 @@ import (  	"errors"  	"fmt"  	"html/template" +	"image" +	"image/color"  	"io"  	"io/fs"  	"log" +	"math"  	"math/bits"  	"net"  	"net/http" @@ -31,6 +34,8 @@ import (  	"time"  	"github.com/mattn/go-sqlite3" +	"golang.org/x/image/draw" +	"golang.org/x/image/webp"  )  var ( @@ -54,7 +59,7 @@ func hammingDistance(a, b int64) int {  func init() {  	sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{  		ConnectHook: func(conn *sqlite3.SQLiteConn) error { -			return conn.RegisterFunc("hamming", hammingDistance, true) +			return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/)  		},  	})  } @@ -439,7 +444,7 @@ type webSimilarImage struct {  	Paths       []string `json:"paths"`  } -func getSimilar(sha1 string, pixels int64, distance int) ( +func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (  	result []webSimilarImage, err error) {  	// For distance ∈ {0, 1}, this query is quite inefficient.  	// In exchange, it's generic. @@ -448,9 +453,8 @@ func getSimilar(sha1 string, pixels int64, distance int) (  	// so not bothering with IFNULL on them.  	rows, err := db.Query(`  		SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0) -		FROM image -		WHERE hamming(dhash, (SELECT dhash FROM image WHERE sha1 = ?)) = ? -		AND sha1 <> ?`, sha1, distance, sha1) +		FROM image WHERE sha1 <> ? AND dhash IS NOT NULL +		AND hamming(dhash, ?) = ?`, sha1, dhash, distance)  	if err != nil {  		return nil, err  	} @@ -475,6 +479,19 @@ func getSimilar(sha1 string, pixels int64, distance int) (  	return result, rows.Err()  } +func getSimilarGroups(sha1 string, dhash int64, pixels int64, +	output map[string][]webSimilarImage) error { +	var err error +	for distance := 0; distance <= 1; distance++ { +		output[fmt.Sprintf("Perceptual distance %d", distance)], err = +			getSimilar(sha1, dhash, pixels, distance) +		if err != nil { +			return err +		} +	} +	return nil +} +  func handleAPISimilar(w http.ResponseWriter, r *http.Request) {  	var params struct {  		SHA1 string @@ -497,10 +514,13 @@ func handleAPISimilar(w http.ResponseWriter, r *http.Request) {  		result.Info.Paths = paths  	} -	var width, height int64 +	var ( +		width, height int64 +		dhash         sql.NullInt64 +	)  	err := db.QueryRow(` -		SELECT width, height, IFNULL(thumbw, 0), IFNULL(thumbh, 0) -		FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, +		SELECT width, height, dhash, IFNULL(thumbw, 0), IFNULL(thumbh, 0) +		FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, &dhash,  		&result.Info.ThumbW, &result.Info.ThumbH)  	if err != nil {  		http.Error(w, err.Error(), http.StatusInternalServerError) @@ -508,10 +528,9 @@ func handleAPISimilar(w http.ResponseWriter, r *http.Request) {  	}  	result.Groups = make(map[string][]webSimilarImage) -	for distance := 0; distance <= 1; distance++ { -		result.Groups[fmt.Sprintf("Perceptual distance %d", distance)], err = -			getSimilar(params.SHA1, width*height, distance) -		if err != nil { +	if dhash.Valid { +		if err := getSimilarGroups( +			params.SHA1, dhash.Int64, width*height, result.Groups); err != nil {  			http.Error(w, err.Error(), http.StatusInternalServerError)  			return  		} @@ -535,6 +554,7 @@ type webDuplicateImage struct {  const duplicatesCTE = `WITH  	duplicated(dhash, count) AS (  		SELECT dhash, COUNT(*) AS count FROM image +		WHERE dhash IS NOT NULL  		GROUP BY dhash HAVING count > 1  	),  	multipathed(sha1, count) AS ( @@ -1847,6 +1867,7 @@ func cmdThumbnail(args []string) error {  			defer wg.Done()  			if err := makeThumbnailFor(sha1); err != nil {  				if ee, ok := err.(*exec.ExitError); ok { +					// FIXME: Not in the goroutine, or lock it.  					pb.Stop()  					log.Printf("%s: %s\n", sha1, ee.Stderr)  					pb.Update() @@ -1866,27 +1887,133 @@ func cmdThumbnail(args []string) error {  // --- Perceptual hash --------------------------------------------------------- -func makeDhash(hasher, pathThumb string) (uint64, error) { -	out, err := exec.Command(hasher, pathThumb).Output() +type linearImage struct { +	img image.Image +} + +func newLinearImage(img image.Image) *linearImage { +	return &linearImage{img: img} +} + +func (l *linearImage) ColorModel() color.Model { return l.img.ColorModel() } +func (l *linearImage) Bounds() image.Rectangle { return l.img.Bounds() } + +func unSRGB(c uint32) uint8 { +	n := float64(c) / 0xffff +	if n <= 0.04045 { +		return uint8(n * (255.0 / 12.92)) +	} +	return uint8(math.Pow((n+0.055)/(1.055), 2.4) * 255.0) +} + +func (l *linearImage) At(x, y int) color.Color { +	r, g, b, a := l.img.At(x, y).RGBA() +	return color.RGBA{ +		R: unSRGB(r), G: unSRGB(g), B: unSRGB(b), A: uint8(a >> 8)} +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// isWebPAnimation returns whether the given ReadSeeker starts a WebP animation. +// See https://developers.google.com/speed/webp/docs/riff_container +func isWebPAnimation(rs io.ReadSeeker) (bool, error) { +	b := make([]byte, 17) +	if _, err := rs.Read(b); err != nil { +		return false, err +	} +	if _, err := rs.Seek(0, io.SeekStart); err != nil { +		return false, err +	} + +	return bytes.Equal(b[:4], []byte("RIFF")) && +		bytes.Equal(b[8:16], []byte("WEBPVP8X")) && +		b[16]&0b00000010 != 0, nil +} + +var errIsAnimation = errors.New("cannot perceptually hash animations") + +func dhashWebP(rs io.ReadSeeker) (uint64, error) { +	if a, err := isWebPAnimation(rs); err != nil { +		return 0, err +	} else if a { +		return 0, errIsAnimation +	} + +	// Doing this entire thing in Go is SLOW, but convenient. +	source, err := webp.Decode(rs)  	if err != nil {  		return 0, err  	} +	var ( +		linear  = newLinearImage(source) +		resized = image.NewNRGBA64(image.Rect(0, 0, 9, 8)) +	) +	draw.CatmullRom.Scale(resized, resized.Bounds(), +		linear, linear.Bounds(), draw.Src, nil) +  	var hash uint64 -	_, err = fmt.Fscanf(bytes.NewReader(out), "%x", &hash) -	return hash, err +	for y := 0; y < 8; y++ { +		var grey [9]float32 +		for x := 0; x < 9; x++ { +			rgba := resized.NRGBA64At(x, y) +			grey[x] = 0.2126*float32(rgba.R) + +				0.7152*float32(rgba.G) + +				0.0722*float32(rgba.B) +		} + +		var row uint64 +		if grey[0] < grey[1] { +			row |= 1 << 7 +		} +		if grey[1] < grey[2] { +			row |= 1 << 6 +		} +		if grey[2] < grey[3] { +			row |= 1 << 5 +		} +		if grey[3] < grey[4] { +			row |= 1 << 4 +		} +		if grey[4] < grey[5] { +			row |= 1 << 3 +		} +		if grey[5] < grey[6] { +			row |= 1 << 2 +		} +		if grey[6] < grey[7] { +			row |= 1 << 1 +		} +		if grey[7] < grey[8] { +			row |= 1 << 0 +		} +		hash = hash<<8 | row +	} +	return hash, nil +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +func makeDhash(sha1 string) (uint64, error) { +	pathThumb := thumbPath(sha1) +	f, err := os.Open(pathThumb) +	if err != nil { +		return 0, err +	} +	defer f.Close() +	return dhashWebP(f)  }  // cmdDhash generates perceptual hash from thumbnails.  func cmdDhash(args []string) error {  	if len(args) < 1 { -		return errors.New("usage: GD HASHER [SHA1...]") +		return errors.New("usage: GD [SHA1...]")  	}  	if err := openDB(args[0]); err != nil {  		return err  	} -	hasher, hexSHA1 := args[1], args[2:] +	hexSHA1 := args[1:]  	if len(hexSHA1) == 0 {  		var err error  		hexSHA1, err = dbCollectStrings(` @@ -1899,23 +2026,35 @@ func cmdDhash(args []string) error {  	pb := newProgressBar(len(hexSHA1))  	defer pb.Stop() -	// TODO: Also run the hasher in parallel, once it becomes a problem. -	// And/or run it in batches, since start-up time of the hasher -	// poses considerable overhead with large amounts of images. +	ctx, cancel := context.WithCancelCause(context.Background()) +	wg := sync.WaitGroup{}  	for _, sha1 := range hexSHA1 { -		pathThumb := thumbPath(sha1) -		hash, err := makeDhash(hasher, pathThumb) -		if err != nil { -			return err -		} - -		_, err = db.Exec(`UPDATE image SET dhash = ? WHERE sha1 = ?`, -			int64(hash), sha1) -		if err != nil { -			return err +		if taskSemaphore.acquire(ctx) != nil { +			break  		} -		pb.Step() +		wg.Add(1) +		go func(sha1 string) { +			defer taskSemaphore.release() +			defer wg.Done() +			if hash, err := makeDhash(sha1); errors.Is(err, errIsAnimation) { +				// Ignoring this common condition. +			} else if err != nil { +				// FIXME: Not in the goroutine, or lock it. +				pb.Stop() +				log.Printf("%s: %s\n", sha1, err) +				pb.Update() +			} else if _, err = db.Exec( +				`UPDATE image SET dhash = ? WHERE sha1 = ?`, +				int64(hash), sha1); err != nil { +				cancel(err) +			} +			pb.Step() +		}(sha1) +	} +	wg.Wait() +	if ctx.Err() != nil { +		return context.Cause(ctx)  	}  	return nil  } diff --git a/public/gallery.js b/public/gallery.js index ca1e511..970a3bf 100644 --- a/public/gallery.js +++ b/public/gallery.js @@ -1,6 +1,10 @@  'use strict'  function call(method, params) { +	// TODO: Make it apparent when results result in errors: +	//  - With responseType == "json", m.request() always expects JSON, +	//    and error.message is null if it fails, but we can handle it manually. +	//  - Go can wrap all errors into trivial strings before writing.  	return m.request({  		method: "POST",  		url: `/api/${method}`, | 
