aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2024-01-22 15:06:53 +0100
committerPřemysl Eric Janouch <p@janouch.name>2024-01-22 15:06:53 +0100
commitf9f22ba42c1f72540fd28576eb568142da7cd03c (patch)
treebdbd63e3f5b838c68cefb4853c4c9c782d339b52
parent7300773b96b5f6324b0b9da624f0ed34de0c1326 (diff)
downloadgallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.tar.gz
gallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.tar.xz
gallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.zip
gallery: optimize the related tags query
-rw-r--r--main.go82
1 files changed, 57 insertions, 25 deletions
diff --git a/main.go b/main.go
index bd54c7a..408b7ea 100644
--- a/main.go
+++ b/main.go
@@ -965,22 +965,12 @@ const searchCTE = `WITH
JOIN image AS i ON i.sha1 = ta.sha1
WHERE ta.tag = ?
),
- supertags(tag) AS (
- SELECT DISTINCT ta.tag
+ supertags(tag, space, name) AS (
+ SELECT DISTINCT ta.tag, ts.name, t.name
FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1
- ),
- scoredtags(tag, score) AS (
- -- The cross join is a deliberate optimization,
- -- and this query may still be really slow.
- SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score
- FROM matches AS m
- CROSS JOIN supertags AS st
- LEFT JOIN tag_assignment AS ta
- ON ta.sha1 = m.sha1 AND ta.tag = st.tag
- GROUP BY st.tag
- -- Using the column alias doesn't fail, but it also doesn't work.
- HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01
+ JOIN tag AS t ON ta.tag = t.id
+ JOIN tag_space AS ts ON ts.id = t.space
)
`
@@ -1012,32 +1002,73 @@ func getTagMatches(tag int64) (matches []webTagMatch, err error) {
return matches, rows.Err()
}
+type webTagSupertag struct {
+ space string
+ tag string
+ score float32
+}
+
+func getTagSupertags(tag int64) (result map[int64]*webTagSupertag, err error) {
+ rows, err := db.Query(searchCTE+`
+ SELECT tag, space, name FROM supertags`, tag)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ result = make(map[int64]*webTagSupertag)
+ for rows.Next() {
+ var (
+ tag int64
+ st webTagSupertag
+ )
+ if err = rows.Scan(&tag, &st.space, &st.tag); err != nil {
+ return nil, err
+ }
+ result[tag] = &st
+ }
+ return result, rows.Err()
+}
+
type webTagRelated struct {
Tag string `json:"tag"`
Score float32 `json:"score"`
}
-func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) {
+func getTagRelated(tag int64, matches int) (
+ result map[string][]webTagRelated, err error) {
+ // Not sure if this level of efficiency is achievable directly in SQL.
+ supertags, err := getTagSupertags(tag)
+ if err != nil {
+ return nil, err
+ }
+
rows, err := db.Query(searchCTE+`
- SELECT ts.name, t.name, st.score FROM scoredtags AS st
- JOIN tag AS t ON st.tag = t.id
- JOIN tag_space AS ts ON ts.id = t.space
- ORDER BY st.score DESC`, tag)
+ SELECT ta.tag, ta.weight
+ FROM tag_assignment AS ta
+ JOIN matches AS m ON m.sha1 = ta.sha1`, tag)
if err != nil {
return nil, err
}
defer rows.Close()
- result = make(map[string][]webTagRelated)
for rows.Next() {
var (
- space string
- r webTagRelated
+ tag int64
+ weight float32
)
- if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil {
+ if err = rows.Scan(&tag, &weight); err != nil {
return nil, err
}
- result[space] = append(result[space], r)
+ supertags[tag].score += weight
+ }
+
+ result = make(map[string][]webTagRelated)
+ for _, info := range supertags {
+ if score := info.score / float32(matches); score >= 0.1 {
+ r := webTagRelated{Tag: info.tag, Score: score}
+ result[info.space] = append(result[info.space], r)
+ }
}
return result, rows.Err()
}
@@ -1075,7 +1106,8 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
- if result.Related, err = getTagRelated(tagID); err != nil {
+ if result.Related, err = getTagRelated(tagID,
+ len(result.Matches)); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}