diff options
| author | Přemysl Eric Janouch <p@janouch.name> | 2024-01-22 15:06:53 +0100 | 
|---|---|---|
| committer | Přemysl Eric Janouch <p@janouch.name> | 2024-01-22 15:06:53 +0100 | 
| commit | f9f22ba42c1f72540fd28576eb568142da7cd03c (patch) | |
| tree | bdbd63e3f5b838c68cefb4853c4c9c782d339b52 | |
| parent | 7300773b96b5f6324b0b9da624f0ed34de0c1326 (diff) | |
| download | gallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.tar.gz gallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.tar.xz gallery-f9f22ba42c1f72540fd28576eb568142da7cd03c.zip | |
gallery: optimize the related tags query
| -rw-r--r-- | main.go | 82 | 
1 files changed, 57 insertions, 25 deletions
| @@ -965,22 +965,12 @@ const searchCTE = `WITH  		JOIN image AS i ON i.sha1 = ta.sha1  		WHERE ta.tag = ?  	), -	supertags(tag) AS ( -		SELECT DISTINCT ta.tag +	supertags(tag, space, name) AS ( +		SELECT DISTINCT ta.tag, ts.name, t.name  		FROM tag_assignment AS ta  		JOIN matches AS m ON m.sha1 = ta.sha1 -	), -	scoredtags(tag, score) AS ( -		-- The cross join is a deliberate optimization, -		-- and this query may still be really slow. -		SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score -		FROM matches AS m -		CROSS JOIN supertags AS st -		LEFT JOIN tag_assignment AS ta -		ON ta.sha1 = m.sha1 AND ta.tag = st.tag -		GROUP BY st.tag -		-- Using the column alias doesn't fail, but it also doesn't work. -		HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01 +		JOIN tag AS t ON ta.tag = t.id +		JOIN tag_space AS ts ON ts.id = t.space  	)  ` @@ -1012,32 +1002,73 @@ func getTagMatches(tag int64) (matches []webTagMatch, err error) {  	return matches, rows.Err()  } +type webTagSupertag struct { +	space string +	tag   string +	score float32 +} + +func getTagSupertags(tag int64) (result map[int64]*webTagSupertag, err error) { +	rows, err := db.Query(searchCTE+` +		SELECT tag, space, name FROM supertags`, tag) +	if err != nil { +		return nil, err +	} +	defer rows.Close() + +	result = make(map[int64]*webTagSupertag) +	for rows.Next() { +		var ( +			tag int64 +			st  webTagSupertag +		) +		if err = rows.Scan(&tag, &st.space, &st.tag); err != nil { +			return nil, err +		} +		result[tag] = &st +	} +	return result, rows.Err() +} +  type webTagRelated struct {  	Tag   string  `json:"tag"`  	Score float32 `json:"score"`  } -func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) { +func getTagRelated(tag int64, matches int) ( +	result map[string][]webTagRelated, err error) { +	// Not sure if this level of efficiency is achievable directly in SQL. +	supertags, err := getTagSupertags(tag) +	if err != nil { +		return nil, err +	} +  	rows, err := db.Query(searchCTE+` -		SELECT ts.name, t.name, st.score FROM scoredtags AS st -		JOIN tag AS t ON st.tag = t.id -		JOIN tag_space AS ts ON ts.id = t.space -		ORDER BY st.score DESC`, tag) +		SELECT ta.tag, ta.weight +		FROM tag_assignment AS ta +		JOIN matches AS m ON m.sha1 = ta.sha1`, tag)  	if err != nil {  		return nil, err  	}  	defer rows.Close() -	result = make(map[string][]webTagRelated)  	for rows.Next() {  		var ( -			space string -			r     webTagRelated +			tag    int64 +			weight float32  		) -		if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil { +		if err = rows.Scan(&tag, &weight); err != nil {  			return nil, err  		} -		result[space] = append(result[space], r) +		supertags[tag].score += weight +	} + +	result = make(map[string][]webTagRelated) +	for _, info := range supertags { +		if score := info.score / float32(matches); score >= 0.1 { +			r := webTagRelated{Tag: info.tag, Score: score} +			result[info.space] = append(result[info.space], r) +		}  	}  	return result, rows.Err()  } @@ -1075,7 +1106,8 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {  		http.Error(w, err.Error(), http.StatusInternalServerError)  		return  	} -	if result.Related, err = getTagRelated(tagID); err != nil { +	if result.Related, err = getTagRelated(tagID, +		len(result.Matches)); err != nil {  		http.Error(w, err.Error(), http.StatusInternalServerError)  		return  	} | 
