aboutsummaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go115
1 files changed, 70 insertions, 45 deletions
diff --git a/main.go b/main.go
index 4f574cc..ae0a5f7 100644
--- a/main.go
+++ b/main.go
@@ -531,14 +531,24 @@ type webDuplicateImage struct {
Occurences int64 `json:"occurences"`
}
-type webDuplicateGroup struct {
- Main webDuplicateImage `json:"main"`
- Similar []webDuplicateImage `json:"similar"`
-}
-
-func getDuplicateSimilar(stmt *sql.Stmt, sha1 string, dhash int64) (
+// A hamming distance of zero (direct dhash match) will be more than sufficient.
+const duplicatesCTE = `WITH
+ duplicated(dhash, count) AS (
+ SELECT dhash, COUNT(*) AS count FROM image
+ GROUP BY dhash HAVING count > 1
+ ),
+ multipathed(sha1, count) AS (
+ SELECT n.sha1, COUNT(*) AS count FROM node AS n
+ JOIN image AS i ON i.sha1 = n.sha1
+ WHERE i.dhash IS NULL
+ OR i.dhash NOT IN (SELECT dhash FROM duplicated)
+ GROUP BY n.sha1 HAVING count > 1
+ )
+`
+
+func getDuplicatesSimilar(stmt *sql.Stmt, dhash int64) (
result []webDuplicateImage, err error) {
- rows, err := stmt.Query(dhash, sha1)
+ rows, err := stmt.Query(dhash)
if err != nil {
return nil, err
}
@@ -556,67 +566,75 @@ func getDuplicateSimilar(stmt *sql.Stmt, sha1 string, dhash int64) (
return result, rows.Err()
}
-// A hamming distance of zero (direct dhash match) will be more than sufficient.
-const duplicatesCTE = `WITH
- multiplied(sha1, count) AS (
- SELECT sha1, COUNT(*) AS count FROM node
- GROUP BY sha1 HAVING count > 1
- ),
- similarized(sha1, count) AS (
- SELECT i1.sha1, COUNT(*) AS count FROM image AS i1
- JOIN image AS i2 ON i1.dhash = i2.dhash AND i1.sha1 <> i2.sha1
- GROUP BY i1.sha1
- ),
- duplicates(sha1) AS (
- SELECT sha1 FROM multiplied
- UNION
- SELECT sha1 FROM similarized
- )`
-
-func getDuplicates() (result []webDuplicateGroup, err error) {
+func getDuplicates1(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
stmt, err := db.Prepare(`
SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
- COUNT(*) AS count
+ COUNT(*) AS occurences
FROM image AS i
JOIN node AS n ON n.sha1 = i.sha1
- WHERE i.dhash = ? AND i.sha1 <> ?
+ WHERE i.dhash = ?
GROUP BY n.sha1`)
if err != nil {
return nil, err
}
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT dhash FROM duplicated`)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var (
+ group []webDuplicateImage
+ dhash int64
+ )
+ if err = rows.Scan(&dhash); err != nil {
+ return nil, err
+ }
+ if group, err = getDuplicatesSimilar(stmt, dhash); err != nil {
+ return nil, err
+ }
+ result = append(result, group)
+ }
+ return result, rows.Err()
+}
- // FIXME: Never duplicate images.
- rows, err := db.Query(duplicatesCTE + `
+func getDuplicates2(result [][]webDuplicateImage) (
+ [][]webDuplicateImage, error) {
+ stmt, err := db.Prepare(`
SELECT i.sha1, IFNULL(i.thumbw, 0), IFNULL(i.thumbh, 0),
- i.dhash, COUNT(*) AS count
+ COUNT(*) AS occurences
FROM image AS i
- JOIN duplicates AS d ON d.sha1 = i.sha1
JOIN node AS n ON n.sha1 = i.sha1
+ WHERE i.sha1 = ?
GROUP BY n.sha1`)
if err != nil {
return nil, err
}
+ defer stmt.Close()
+
+ rows, err := db.Query(duplicatesCTE + `SELECT sha1 FROM multipathed`)
+ if err != nil {
+ return nil, err
+ }
defer rows.Close()
- result = []webDuplicateGroup{}
for rows.Next() {
var (
- image webDuplicateImage
- similar []webDuplicateImage
- dhash int64
+ image webDuplicateImage
+ sha1 string
)
- if err = rows.Scan(&image.SHA1, &image.ThumbW, &image.ThumbH,
- &dhash, &image.Occurences); err != nil {
+ if err = rows.Scan(&sha1); err != nil {
return nil, err
}
- if similar, err = getDuplicateSimilar(
- stmt, image.SHA1, dhash); err != nil {
+ if err := stmt.QueryRow(sha1).Scan(&image.SHA1,
+ &image.ThumbW, &image.ThumbH, &image.Occurences); err != nil {
return nil, err
}
- result = append(result, webDuplicateGroup{
- Main: image,
- Similar: similar,
- })
+ result = append(result, []webDuplicateImage{image})
}
return result, rows.Err()
}
@@ -628,8 +646,15 @@ func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
return
}
- result, err := getDuplicates()
- if err != nil {
+ var (
+ result [][]webDuplicateImage
+ err error
+ )
+ if result, err = getDuplicates1(result); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ if result, err = getDuplicates2(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}