From d7473f4b38226db8937f1d93fccd5e49997fe9fc Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Thu, 3 Mar 2022 09:38:37 +1100 Subject: [PATCH] Distance match phashes on bulk stash-box query (#2355) --- pkg/scraper/stashbox/stash_box.go | 37 ++++++++++++++++++- .../components/Changelog/versions/v0130.md | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/pkg/scraper/stashbox/stash_box.go b/pkg/scraper/stashbox/stash_box.go index ff4d9e101..0d761a6e9 100644 --- a/pkg/scraper/stashbox/stash_box.go +++ b/pkg/scraper/stashbox/stash_box.go @@ -14,6 +14,7 @@ import ( "github.com/Yamashou/gqlgenc/client" "github.com/Yamashou/gqlgenc/graphqljson" + "github.com/corona10/goimagehash" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/match" @@ -71,6 +72,18 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod return ret, nil } +func phashMatches(hash, other int64) bool { + // HACK - stash-box match distance is configurable. This needs to be fixed on + // the stash-box end. + const stashBoxDistance = 4 + + imageHash := goimagehash.NewImageHash(uint64(hash), goimagehash.PHash) + otherHash := goimagehash.NewImageHash(uint64(other), goimagehash.PHash) + + distance, _ := imageHash.Distance(otherHash) + return distance <= stashBoxDistance +} + // FindStashBoxScenesByFingerprints queries stash-box for scenes using every // scene's MD5/OSHASH checksum, or PHash, and returns results in the same order // as the input slice. @@ -83,6 +96,7 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [ var fingerprints []*graphql.FingerprintQueryInput // map fingerprints to their scene index fpToScene := make(map[string][]int) + phashToScene := make(map[int64][]int) if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { qb := r.Scene() @@ -120,6 +134,7 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [ Algorithm: graphql.FingerprintAlgorithmPhash, }) fpToScene[phashStr] = append(fpToScene[phashStr], index) + phashToScene[scene.Phash.Int64] = append(phashToScene[scene.Phash.Int64], index) } } @@ -137,8 +152,8 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [ ret := make([][]*models.ScrapedScene, len(sceneIDs)) for _, s := range allScenes { var addedTo []int - for _, fp := range s.Fingerprints { - sceneIndexes := fpToScene[fp.Hash] + + addScene := func(sceneIndexes []int) { for _, index := range sceneIndexes { if !utils.IntInclude(addedTo, index) { addedTo = append(addedTo, index) @@ -146,6 +161,24 @@ func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs [ } } } + + for _, fp := range s.Fingerprints { + addScene(fpToScene[fp.Hash]) + + // HACK - we really need stash-box to return specific hash-to-result sets + if fp.Algorithm == graphql.FingerprintAlgorithmPhash.String() { + hash, err := utils.StringToPhash(fp.Hash) + if err != nil { + continue + } + + for phash, sceneIndexes := range phashToScene { + if phashMatches(hash, phash) { + addScene(sceneIndexes) + } + } + } + } } return ret, nil diff --git a/ui/v2.5/src/components/Changelog/versions/v0130.md b/ui/v2.5/src/components/Changelog/versions/v0130.md index f87947c3f..9320e0e49 100644 --- a/ui/v2.5/src/components/Changelog/versions/v0130.md +++ b/ui/v2.5/src/components/Changelog/versions/v0130.md @@ -18,6 +18,7 @@ * Show counts on list tabs in Performer, Studio and Tag pages. ([#2169](https://github.com/stashapp/stash/pull/2169)) ### 🐛 Bug fixes +* Fix Scrape All button not returning phash distance-matched results from stash-box. ([#2355](https://github.com/stashapp/stash/pull/2355)) * Fix performer checksum not being updated when name updated via batch stash-box tag. ([#2345](https://github.com/stashapp/stash/pull/2345)) * Fix studios/performers/tags with unicode characters not being auto-tagged. ([#2336](https://github.com/stashapp/stash/pull/2336)) * Preview Generation now uses defaults defined in System settings unless overridden in the Generate options. ([#2328](https://github.com/stashapp/stash/pull/2328))