diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql
index 7f07e4579..ae356e468 100644
--- a/graphql/schema/schema.graphql
+++ b/graphql/schema/schema.graphql
@@ -53,6 +53,9 @@ type Query {
duration_diff: Float
): [[Scene!]!]!
+ "Find duplicate images"
+ findDuplicateImages(distance: Int! = 0): [[Image!]!]!
+
"Return valid stream paths"
sceneStreams(id: ID): [SceneStreamEndpoint!]!
diff --git a/internal/api/resolver_query_find_image.go b/internal/api/resolver_query_find_image.go
index 90eaf33c0..a09ca768e 100644
--- a/internal/api/resolver_query_find_image.go
+++ b/internal/api/resolver_query_find_image.go
@@ -134,3 +134,7 @@ func (r *queryResolver) AllImages(ctx context.Context) (ret []*models.Image, err
return ret, nil
}
+
+func (r *queryResolver) FindDuplicateImages(ctx context.Context, distance int) ([][]*models.Image, error) {
+ return r.repository.Image.FindDuplicates(ctx, distance)
+}
diff --git a/pkg/models/mocks/ImageReaderWriter.go b/pkg/models/mocks/ImageReaderWriter.go
index f2c9934be..f3f05aaff 100644
--- a/pkg/models/mocks/ImageReaderWriter.go
+++ b/pkg/models/mocks/ImageReaderWriter.go
@@ -370,6 +370,29 @@ func (_m *ImageReaderWriter) FindByZipFileID(ctx context.Context, zipFileID mode
return r0, r1
}
+// FindDuplicates provides a mock function with given fields: ctx, distance
+func (_m *ImageReaderWriter) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) {
+ ret := _m.Called(ctx, distance)
+
+ var r0 [][]*models.Image
+ if rf, ok := ret.Get(0).(func(context.Context, int) [][]*models.Image); ok {
+ r0 = rf(ctx, distance)
+ } else {
+ if ret.Get(0) != nil {
+ r0 = ret.Get(0).([][]*models.Image)
+ }
+ }
+
+ var r1 error
+ if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
+ r1 = rf(ctx, distance)
+ } else {
+ r1 = ret.Error(1)
+ }
+
+ return r0, r1
+}
+
// FindMany provides a mock function with given fields: ctx, ids
func (_m *ImageReaderWriter) FindMany(ctx context.Context, ids []int) ([]*models.Image, error) {
ret := _m.Called(ctx, ids)
diff --git a/pkg/models/repository_image.go b/pkg/models/repository_image.go
index 99dab3479..10e0d195a 100644
--- a/pkg/models/repository_image.go
+++ b/pkg/models/repository_image.go
@@ -19,6 +19,7 @@ type ImageFinder interface {
FindByZipFileID(ctx context.Context, zipFileID FileID) ([]*Image, error)
FindByGalleryID(ctx context.Context, galleryID int) ([]*Image, error)
FindByGalleryIDIndex(ctx context.Context, galleryID int, index uint) (*Image, error)
+ FindDuplicates(ctx context.Context, distance int) ([][]*Image, error)
}
// ImageQueryer provides methods to query images.
diff --git a/pkg/sqlite/image.go b/pkg/sqlite/image.go
index e0ac576d8..28ee5e49a 100644
--- a/pkg/sqlite/image.go
+++ b/pkg/sqlite/image.go
@@ -7,6 +7,7 @@ import (
"fmt"
"path/filepath"
"slices"
+ "strconv"
"github.com/jmoiron/sqlx"
"github.com/stashapp/stash/pkg/models"
@@ -1093,3 +1094,112 @@ func (qb *ImageStore) UpdateTags(ctx context.Context, imageID int, tagIDs []int)
func (qb *ImageStore) GetURLs(ctx context.Context, imageID int) ([]string, error) {
return imagesURLsTableMgr.get(ctx, imageID)
}
+
+func (qb *ImageStore) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) {
+ return qb.findPhashMatches(ctx, distance)
+}
+
+func (qb *ImageStore) findPhashMatches(ctx context.Context, distance int) ([][]*models.Image, error) {
+ query := `
+ SELECT images.id, files_fingerprints.fingerprint as phash
+ FROM images
+ JOIN images_files ON images.id = images_files.image_id
+ JOIN files_fingerprints ON images_files.file_id = files_fingerprints.file_id
+ WHERE files_fingerprints.type = 'phash'`
+
+ type ImagePhash struct {
+ ID int `db:"id"`
+ PHash string `db:"phash"`
+ }
+
+ var hashes []ImagePhash
+ err := imageRepository.queryStruct(ctx, query, nil, &hashes)
+ if err != nil {
+ return nil, err
+ }
+
+ // Parse hashes
+ type ParsedPhash struct {
+ ID int
+ PHash uint64
+ }
+ var parsedHashes []ParsedPhash
+ for _, h := range hashes {
+ val, parseErr := strconv.ParseUint(h.PHash, 16, 64)
+ if parseErr == nil {
+ parsedHashes = append(parsedHashes, ParsedPhash{ID: h.ID, PHash: val})
+ }
+ }
+
+ // Helper for Popcount
+ popcount := func(x uint64) int {
+ x -= (x >> 1) & 0x5555555555555555
+ x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333)
+ x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f
+ return int((x * 0x0101010101010101) >> 56)
+ }
+
+ // Adjacency list for connected components
+ adj := make(map[int][]int)
+ nodes := make(map[int]bool)
+
+ // O(N^2) comparison in memory
+ for i := 0; i < len(parsedHashes); i++ {
+ for j := i + 1; j < len(parsedHashes); j++ {
+ diff := popcount(parsedHashes[i].PHash ^ parsedHashes[j].PHash)
+ if diff <= distance {
+ id1 := parsedHashes[i].ID
+ id2 := parsedHashes[j].ID
+ adj[id1] = append(adj[id1], id2)
+ adj[id2] = append(adj[id2], id1)
+ nodes[id1] = true
+ nodes[id2] = true
+ }
+ }
+ }
+
+ // Find connected components
+ visited := make(map[int]bool)
+ var components [][]int
+
+ for node := range nodes {
+ if !visited[node] {
+ var component []int
+ queue := []int{node}
+ visited[node] = true
+
+ for len(queue) > 0 {
+ curr := queue[0]
+ queue = queue[1:]
+ component = append(component, curr)
+
+ for _, neighbor := range adj[curr] {
+ if !visited[neighbor] {
+ visited[neighbor] = true
+ queue = append(queue, neighbor)
+ }
+ }
+ }
+ if len(component) > 1 {
+ components = append(components, component)
+ }
+ }
+ }
+
+ // Fetch actual image objects
+ var result [][]*models.Image
+ for _, comp := range components {
+ var group []*models.Image
+ for _, id := range comp {
+ img, err := qb.Find(ctx, id)
+ if err == nil && img != nil {
+ group = append(group, img)
+ }
+ }
+ if len(group) > 1 {
+ result = append(result, group)
+ }
+ }
+
+ return result, nil
+}
diff --git a/ui/v2.5/graphql/queries/image.graphql b/ui/v2.5/graphql/queries/image.graphql
index d2c6cdac8..c74fc4cfd 100644
--- a/ui/v2.5/graphql/queries/image.graphql
+++ b/ui/v2.5/graphql/queries/image.graphql
@@ -35,3 +35,9 @@ query FindImage($id: ID!, $checksum: String) {
...ImageData
}
}
+
+query FindDuplicateImages($distance: Int) {
+ findDuplicateImages(distance: $distance) {
+ ...ImageData
+ }
+}
diff --git a/ui/v2.5/src/App.tsx b/ui/v2.5/src/App.tsx
index d08274b18..9bb40e7cb 100644
--- a/ui/v2.5/src/App.tsx
+++ b/ui/v2.5/src/App.tsx
@@ -82,6 +82,9 @@ const SceneFilenameParser = lazyComponent(
const SceneDuplicateChecker = lazyComponent(
() => import("./components/SceneDuplicateChecker/SceneDuplicateChecker")
);
+const ImageDuplicateChecker = lazyComponent(
+ () => import("./components/ImageDuplicateChecker/ImageDuplicateChecker")
+);
const appleRendering = isPlatformUniquelyRenderedByApple();
@@ -269,6 +272,10 @@ export const App: React.FC = () => {
path="/sceneDuplicateChecker"
component={SceneDuplicateChecker}
/>
+
No duplicates found.
+ )} + + {results.map((group, index) => { + if (!group || group.length < 2) return null; + return ( +