summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2023-12-22 23:46:27 +0100
committerPřemysl Eric Janouch <p@janouch.name>2023-12-22 23:46:27 +0100
commit42a57b3271575fd323068bf8b9108d00f0b4a5b3 (patch)
tree0b5e3e25d68d1d49e1f533cb3a7e1141796b1d42
parent0b5d388af213680d0def2c03d3c8814c3e2ceaa2 (diff)
downloadgallery-42a57b3271575fd323068bf8b9108d00f0b4a5b3.tar.gz
gallery-42a57b3271575fd323068bf8b9108d00f0b4a5b3.tar.xz
gallery-42a57b3271575fd323068bf8b9108d00f0b4a5b3.zip
WIP: Global duplicate search
-rw-r--r--main.go124
-rw-r--r--public/gallery.js57
-rw-r--r--public/style.css3
3 files changed, 180 insertions, 4 deletions
diff --git a/main.go b/main.go
index d1fdbb9..4f574cc 100644
--- a/main.go
+++ b/main.go
@@ -446,7 +446,8 @@ func getSimilar(sha1 string, pixels int64, distance int) (
//
// If there's a dhash, there should also be thumbnail dimensions,
// so not bothering with IFNULL on them.
- rows, err := db.Query(`SELECT sha1, width * height, thumbw, thumbh
+ rows, err := db.Query(`
+ SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image
WHERE hamming(dhash, (SELECT dhash FROM image WHERE sha1 = ?)) = ?
AND sha1 <> ?`, sha1, distance, sha1)
@@ -497,7 +498,8 @@ func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
}
var width, height int64
- err := db.QueryRow(`SELECT width, height, thumbw, thumbh
+ err := db.QueryRow(`
+ SELECT width, height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 = ?`, params.SHA1).Scan(&width, &height,
&result.Info.ThumbW, &result.Info.ThumbH)
if err != nil {
@@ -522,6 +524,123 @@ func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+type webDuplicateImage struct {
+ SHA1 string `json:"sha1"`
+ ThumbW int64 `json:"thumbW"`
+ ThumbH int64 `json:"thumbH"`
+ Occurences int64 `json:"occurences"`
+}
+
+type webDuplicateGroup struct {
+ Main webDuplicateImage `json:"main"`
+ Similar []webDuplicateImage `json:"similar"`
+}
+
+func getDuplicateSimilar(stmt *sql.Stmt, sha1 string, dhash int64) (
+ result []webDuplicateImage, err error) {
+ rows, err := stmt.Query(dhash, sha1)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webDuplicateImage{}
+ for rows.Next() {
+ var image webDuplicateImage
+ if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
+ &image.Occurences); err != nil {
+ return nil, err
+ }
+ result = append(result, image)
+ }
+ return result, rows.Err()
+}
+
+// A hamming distance of zero (direct dhash match) will be more than sufficient.
+const duplicatesCTE = `WITH
+ multiplied(sha1, count) AS (
+ SELECT sha1, COUNT(*) AS count FROM node
+ GROUP BY sha1 HAVING count > 1
+ ),
+ similarized(sha1, count) AS (
+ SELECT i1.sha1, COUNT(*) AS count FROM image AS i1
+ JOIN image AS i2 ON i1.dhash = i2.dhash AND i1.sha1 <> i2.sha1
+ GROUP BY i1.sha1
+ ),
+ duplicates(sha1) AS (
+ SELECT sha1 FROM multiplied
+ UNION
+ SELECT sha1 FROM similarized
+ )`
+
+func getDuplicates() (result []webDuplicateGroup, err error) {
+ stmt, err := db.Prepare(`
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ COUNT(*) AS count
+ FROM image AS i
+ JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.dhash = ? AND i.sha1 <> ?
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+
+ // FIXME: Never duplicate images.
+ rows, err := db.Query(duplicatesCTE + `
+ SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
+ i.dhash, COUNT(*) AS count
+ FROM image AS i
+ JOIN duplicates AS d ON d.sha1 = i.sha1
+ JOIN node AS n ON n.sha1 = i.sha1
+ GROUP BY n.sha1`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = []webDuplicateGroup{}
+ for rows.Next() {
+ var (
+ image webDuplicateImage
+ similar []webDuplicateImage
+ dhash int64
+ )
+ if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
+ &dhash, &image.Occurences); err != nil {
+ return nil, err
+ }
+ if similar, err = getDuplicateSimilar(
+ stmt, image.SHA1, dhash); err != nil {
+ return nil, err
+ }
+ result = append(result, webDuplicateGroup{
+ Main: image,
+ Similar: similar,
+ })
+ }
+ return result, rows.Err()
+}
+
+func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
+ var params struct{}
+ if err := json.NewDecoder(r.Body).Decode(&params); err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ result, err := getDuplicates()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ if err := json.NewEncoder(w).Encode(result); err != nil {
+ log.Println(err)
+ }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
// cmdRun runs a web UI against GD on ADDRESS.
func cmdRun(args []string) error {
if len(args) != 2 {
@@ -543,6 +662,7 @@ func cmdRun(args []string) error {
http.HandleFunc("/api/browse", handleAPIBrowse)
http.HandleFunc("/api/info", handleAPIInfo)
http.HandleFunc("/api/similar", handleAPISimilar)
+ http.HandleFunc("/api/duplicates", handleAPIDuplicates)
host, port, err := net.SplitHostPort(address)
if err != nil {
diff --git a/public/gallery.js b/public/gallery.js
index ee11858..009eb1e 100644
--- a/public/gallery.js
+++ b/public/gallery.js
@@ -213,7 +213,7 @@ let View = {
m(m.route.Link, {
href: `/similar/:key`,
params: {key: ViewModel.sha1},
- }, "Similar")
+ }, "Similar"),
]),
m('.body', {}, [view, m(ViewBar)]),
])
@@ -302,7 +302,7 @@ let Similar = {
m(m.route.Link, {
href: `/view/:key`,
params: {key: SimilarModel.sha1},
- }, "View")
+ }, "View"),
]),
m('.body', {}, m(SimilarList)),
])
@@ -311,6 +311,58 @@ let Similar = {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+let DuplicatesModel = {
+ entries: [],
+
+ async reload() {
+ this.entries = await call('duplicates', {})
+ },
+}
+
+let DuplicatesThumbnail = {
+ view(vnode) {
+ const info = vnode.attrs.info
+ return [
+ m(m.route.Link, {href: `/similar/${info.sha1}`},
+ m('img', {src: `/thumb/${info.sha1}`,
+ width: info.thumbW, height: info.thumbH})),
+ info.occurences,
+ ]
+ },
+}
+
+let DuplicatesList = {
+ view(vnode) {
+ if (DuplicatesModel.entries.length == 0)
+ return "No duplicates"
+
+ return m('.duplicates', {}, DuplicatesModel.entries.map(entry =>
+ m('.row', [
+ m(DuplicatesThumbnail, {info: entry.main}),
+ entry.similar.map(entry =>
+ m(DuplicatesThumbnail, {info: entry})),
+ ]),
+ ))
+ },
+}
+
+let Duplicates = {
+ oninit(vnode) {
+ DuplicatesModel.reload()
+ },
+
+ view(vnode) {
+ return m('.container', {}, [
+ m('.header', {}, [
+ "Duplicates",
+ ]),
+ m('.body', {}, m(DuplicatesList)),
+ ])
+ },
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
window.addEventListener('load', () => {
m.route(document.body, "/browse/", {
// The path doesn't need to be escaped, perhaps change that (":key...").
@@ -318,6 +370,7 @@ window.addEventListener('load', () => {
"/browse/:key": Browse,
"/view/:key": View,
"/similar/:key": Similar,
+ "/duplicates": Duplicates,
"/tags": undefined,
"/tags/:space": undefined,
diff --git a/public/style.css b/public/style.css
index d6c2e3f..d18735d 100644
--- a/public/style.css
+++ b/public/style.css
@@ -54,3 +54,6 @@ ul.sidebar li.child a {
.similar h2 { margin: 1em 0 0.5em 0; padding: 0; font-size: 1.2rem; }
.similar .row { display: flex; }
.similar .row ul { margin: 0; padding: 0 0 0 1.25em; list-style-type: "- "; }
+
+.duplicates { padding: .5rem; flex-grow: 1; overflow: auto; }
+.duplicates .row { display: flex; }