Distance match phashes on bulk stash-box query (#2355)

This commit is contained in:
WithoutPants 2022-03-03 09:38:37 +11:00 committed by GitHub
parent 40e092ecc5
commit d7473f4b38
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 2 deletions

View file

@ -14,6 +14,7 @@ import (
"github.com/Yamashou/gqlgenc/client"
"github.com/Yamashou/gqlgenc/graphqljson"
"github.com/corona10/goimagehash"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match"
@ -71,6 +72,18 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod
return ret, nil
}
func phashMatches(hash, other int64) bool {
// HACK - stash-box match distance is configurable. This needs to be fixed on
// the stash-box end.
const stashBoxDistance = 4
imageHash := goimagehash.NewImageHash(uint64(hash), goimagehash.PHash)
otherHash := goimagehash.NewImageHash(uint64(other), goimagehash.PHash)
distance, _ := imageHash.Distance(otherHash)
return distance <= stashBoxDistance
}
// FindStashBoxScenesByFingerprints queries stash-box for scenes using every
// scene's MD5/OSHASH checksum, or PHash, and returns results in the same order
// as the input slice.
@ -83,6 +96,7 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [
var fingerprints []*graphql.FingerprintQueryInput
// map fingerprints to their scene index
fpToScene := make(map[string][]int)
phashToScene := make(map[int64][]int)
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
qb := r.Scene()
@ -120,6 +134,7 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [
Algorithm: graphql.FingerprintAlgorithmPhash,
})
fpToScene[phashStr] = append(fpToScene[phashStr], index)
phashToScene[scene.Phash.Int64] = append(phashToScene[scene.Phash.Int64], index)
}
}
@ -137,8 +152,8 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [
ret := make([][]*models.ScrapedScene, len(sceneIDs))
for _, s := range allScenes {
var addedTo []int
for _, fp := range s.Fingerprints {
sceneIndexes := fpToScene[fp.Hash]
addScene := func(sceneIndexes []int) {
for _, index := range sceneIndexes {
if !utils.IntInclude(addedTo, index) {
addedTo = append(addedTo, index)
@ -146,6 +161,24 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [
}
}
}
for _, fp := range s.Fingerprints {
addScene(fpToScene[fp.Hash])
// HACK - we really need stash-box to return specific hash-to-result sets
if fp.Algorithm == graphql.FingerprintAlgorithmPhash.String() {
hash, err := utils.StringToPhash(fp.Hash)
if err != nil {
continue
}
for phash, sceneIndexes := range phashToScene {
if phashMatches(hash, phash) {
addScene(sceneIndexes)
}
}
}
}
}
return ret, nil

View file

@ -18,6 +18,7 @@
* Show counts on list tabs in Performer, Studio and Tag pages. ([#2169](https://github.com/stashapp/stash/pull/2169))
### 🐛 Bug fixes
* Fix Scrape All button not returning phash distance-matched results from stash-box. ([#2355](https://github.com/stashapp/stash/pull/2355))
* Fix performer checksum not being updated when name updated via batch stash-box tag. ([#2345](https://github.com/stashapp/stash/pull/2345))
* Fix studios/performers/tags with unicode characters not being auto-tagged. ([#2336](https://github.com/stashapp/stash/pull/2336))
* Preview Generation now uses defaults defined in System settings unless overridden in the Generate options. ([#2328](https://github.com/stashapp/stash/pull/2328))