From 08e768941fc4f423abdd4f2d496470bacf04b4ee Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch Date: Sun, 17 Dec 2023 16:30:13 +0100 Subject: Add API function for similar image search --- main.go | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- test.sh | 2 ++ 2 files changed, 112 insertions(+), 5 deletions(-) diff --git a/main.go b/main.go index 886e4c7..44214b9 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ import ( "io" "io/fs" "log" + "math/bits" "net" "net/http" "os" @@ -26,7 +27,7 @@ import ( "sync" "time" - _ "github.com/mattn/go-sqlite3" + "github.com/mattn/go-sqlite3" "golang.org/x/sync/semaphore" ) @@ -38,9 +39,21 @@ var ( taskSemaphore *semaphore.Weighted ) +func hammingDistance(a, b int64) int { + return bits.OnesCount64(uint64(a) ^ uint64(b)) +} + +func init() { + sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{ + ConnectHook: func(conn *sqlite3.SQLiteConn) error { + return conn.RegisterFunc("hamming", hammingDistance, true) + }, + }) +} + func openDB(directory string) error { var err error - db, err = sql.Open("sqlite3", "file:"+filepath.Join(directory, + db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory, "gallery.db?_foreign_keys=1&_busy_timeout=1000")) galleryDirectory = directory return err @@ -205,8 +218,8 @@ type webEntry struct { SHA1 string `json:"sha1"` Name string `json:"name"` Modified int64 `json:"modified"` - ThumbW int `json:"thumbW"` - ThumbH int `json:"thumbH"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` } func getSubentries(tx *sql.Tx, parent int64) (entries []webEntry, err error) { @@ -293,6 +306,7 @@ func getImagePaths(sha1 string) (paths []string, err error) { } defer rows.Close() + paths = []string{} for rows.Next() { var path string if err := rows.Scan(&path); err != nil { @@ -348,7 +362,6 @@ func handleAPIInfo(w http.ResponseWriter, r *http.Request) { Height int64 `json:"height"` Paths []string `json:"paths"` Tags map[string]map[string]float32 `json:"tags"` - // TODO: Maybe add perceptual hash collisions. } var err error @@ -374,6 +387,97 @@ func handleAPIInfo(w http.ResponseWriter, r *http.Request) { // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +type webSimilarImage struct { + SHA1 string `json:"sha1"` + PixelsRatio float32 `json:"pixelsRatio"` + ThumbW int64 `json:"thumbW"` + ThumbH int64 `json:"thumbH"` + Paths []string `json:"paths"` +} + +func getSimilar(sha1 string, pixels int64, distance int) ( + result []webSimilarImage, err error) { + // For distance ∈ {0, 1}, this query is quite inefficient. + // In exchange, it's generic. + // + // If there's a dhash, there should also be thumbnail dimensions, + // so not bothering with IFNULL on them. + rows, err := db.Query(`SELECT sha1, width * height, thumbw, thumbh + FROM image + WHERE hamming(dhash, (SELECT dhash FROM image WHERE sha1 = ?)) = ? + AND sha1 <> ?`, sha1, distance, sha1) + if err != nil { + return nil, err + } + defer rows.Close() + + result = []webSimilarImage{} + for rows.Next() { + var ( + match webSimilarImage + matchPixels int64 + ) + if err = rows.Scan(&match.SHA1, + &matchPixels, &match.ThumbW, &match.ThumbH); err != nil { + return nil, err + } + if match.Paths, err = getImagePaths(match.SHA1); err != nil { + return nil, err + } + match.PixelsRatio = float32(matchPixels) / float32(pixels) + result = append(result, match) + } + return result, rows.Err() +} + +func handleAPISimilar(w http.ResponseWriter, r *http.Request) { + var params struct { + SHA1 string + } + if err := json.NewDecoder(r.Body).Decode(¶ms); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + var result struct { + Info webSimilarImage `json:"info"` + Groups map[string][]webSimilarImage `json:"groups"` + } + + result.Info = webSimilarImage{SHA1: params.SHA1, PixelsRatio: 1} + if paths, err := getImagePaths(params.SHA1); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } else { + result.Info.Paths = paths + } + + var width, height int64 + err := db.QueryRow(`SELECT width, height, thumbw, thumbh + FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height, + &result.Info.ThumbW, &result.Info.ThumbH) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + result.Groups = make(map[string][]webSimilarImage) + for distance := 0; distance <= 1; distance++ { + result.Groups[fmt.Sprintf("Perceptual distance %d", distance)], err = + getSimilar(params.SHA1, width*height, distance) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Println(err) + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // cmdRun runs a web UI against GD on ADDRESS. func cmdRun(args []string) error { if len(args) != 2 { @@ -394,6 +498,7 @@ func cmdRun(args []string) error { http.HandleFunc("/thumb/", handleThumbs) http.HandleFunc("/api/browse", handleAPIBrowse) http.HandleFunc("/api/info", handleAPIInfo) + http.HandleFunc("/api/similar", handleAPISimilar) host, port, err := net.SplitHostPort(address) if err != nil { diff --git a/test.sh b/test.sh index 920bae9..c51841a 100755 --- a/test.sh +++ b/test.sh @@ -20,6 +20,8 @@ echo '{"path":"/tmp/Gi"}' | \ curl http://localhost:8080/api/browse -X POST --data-binary @- echo '{"sha1":"d53fc82162fd19a6e7b92b401b08b7505dbf3dfd"}' | \ curl http://localhost:8080/api/info -X POST --data-binary @- +echo '{"sha1":"9539d9895ab8c25d76c321b23b8a327801a496bb"}' | \ +curl http://localhost:8080/api/similar -X POST --data-binary @- kill $web wait $web -- cgit v1.2.3-70-g09d2