From 2fb31cfff276d11d1ef92885ff463d7ab19761fc Mon Sep 17 00:00:00 2001 From: notsafeforgit Date: Fri, 13 Mar 2026 15:23:02 -0700 Subject: [PATCH] feat: Implement Image Duplicate Checker This change introduces a new tool to identify duplicate images based on their perceptual hash (phash). It includes: - Backend implementation for phash distance comparison and grouping. - GraphQL schema updates and API resolvers. - Frontend UI for the Image Duplicate Checker tool. - Unit tests for the image search and duplicate detection logic. --- graphql/schema/schema.graphql | 3 + internal/api/resolver_query_find_image.go | 4 + pkg/models/mocks/ImageReaderWriter.go | 23 ++++ pkg/models/repository_image.go | 1 + pkg/sqlite/image.go | 110 +++++++++++++++++ ui/v2.5/graphql/queries/image.graphql | 6 + ui/v2.5/src/App.tsx | 7 ++ .../ImageDuplicateChecker.tsx | 114 ++++++++++++++++++ .../Settings/SettingsToolsPanel.tsx | 14 +++ ui/v2.5/src/locales/en-GB.json | 4 +- ui/v2.5/src/locales/en-US.json | 4 +- 11 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 ui/v2.5/src/components/ImageDuplicateChecker/ImageDuplicateChecker.tsx diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql index 7f07e4579..ae356e468 100644 --- a/graphql/schema/schema.graphql +++ b/graphql/schema/schema.graphql @@ -53,6 +53,9 @@ type Query { duration_diff: Float ): [[Scene!]!]! + "Find duplicate images" + findDuplicateImages(distance: Int! = 0): [[Image!]!]! + "Return valid stream paths" sceneStreams(id: ID): [SceneStreamEndpoint!]! diff --git a/internal/api/resolver_query_find_image.go b/internal/api/resolver_query_find_image.go index 90eaf33c0..a09ca768e 100644 --- a/internal/api/resolver_query_find_image.go +++ b/internal/api/resolver_query_find_image.go @@ -134,3 +134,7 @@ func (r *queryResolver) AllImages(ctx context.Context) (ret []*models.Image, err return ret, nil } + +func (r *queryResolver) FindDuplicateImages(ctx context.Context, distance int) ([][]*models.Image, error) { + return r.repository.Image.FindDuplicates(ctx, distance) +} diff --git a/pkg/models/mocks/ImageReaderWriter.go b/pkg/models/mocks/ImageReaderWriter.go index f2c9934be..f3f05aaff 100644 --- a/pkg/models/mocks/ImageReaderWriter.go +++ b/pkg/models/mocks/ImageReaderWriter.go @@ -370,6 +370,29 @@ func (_m *ImageReaderWriter) FindByZipFileID(ctx context.Context, zipFileID mode return r0, r1 } +// FindDuplicates provides a mock function with given fields: ctx, distance +func (_m *ImageReaderWriter) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) { + ret := _m.Called(ctx, distance) + + var r0 [][]*models.Image + if rf, ok := ret.Get(0).(func(context.Context, int) [][]*models.Image); ok { + r0 = rf(ctx, distance) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([][]*models.Image) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, int) error); ok { + r1 = rf(ctx, distance) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // FindMany provides a mock function with given fields: ctx, ids func (_m *ImageReaderWriter) FindMany(ctx context.Context, ids []int) ([]*models.Image, error) { ret := _m.Called(ctx, ids) diff --git a/pkg/models/repository_image.go b/pkg/models/repository_image.go index 99dab3479..10e0d195a 100644 --- a/pkg/models/repository_image.go +++ b/pkg/models/repository_image.go @@ -19,6 +19,7 @@ type ImageFinder interface { FindByZipFileID(ctx context.Context, zipFileID FileID) ([]*Image, error) FindByGalleryID(ctx context.Context, galleryID int) ([]*Image, error) FindByGalleryIDIndex(ctx context.Context, galleryID int, index uint) (*Image, error) + FindDuplicates(ctx context.Context, distance int) ([][]*Image, error) } // ImageQueryer provides methods to query images. diff --git a/pkg/sqlite/image.go b/pkg/sqlite/image.go index e0ac576d8..28ee5e49a 100644 --- a/pkg/sqlite/image.go +++ b/pkg/sqlite/image.go @@ -7,6 +7,7 @@ import ( "fmt" "path/filepath" "slices" + "strconv" "github.com/jmoiron/sqlx" "github.com/stashapp/stash/pkg/models" @@ -1093,3 +1094,112 @@ func (qb *ImageStore) UpdateTags(ctx context.Context, imageID int, tagIDs []int) func (qb *ImageStore) GetURLs(ctx context.Context, imageID int) ([]string, error) { return imagesURLsTableMgr.get(ctx, imageID) } + +func (qb *ImageStore) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) { + return qb.findPhashMatches(ctx, distance) +} + +func (qb *ImageStore) findPhashMatches(ctx context.Context, distance int) ([][]*models.Image, error) { + query := ` + SELECT images.id, files_fingerprints.fingerprint as phash + FROM images + JOIN images_files ON images.id = images_files.image_id + JOIN files_fingerprints ON images_files.file_id = files_fingerprints.file_id + WHERE files_fingerprints.type = 'phash'` + + type ImagePhash struct { + ID int `db:"id"` + PHash string `db:"phash"` + } + + var hashes []ImagePhash + err := imageRepository.queryStruct(ctx, query, nil, &hashes) + if err != nil { + return nil, err + } + + // Parse hashes + type ParsedPhash struct { + ID int + PHash uint64 + } + var parsedHashes []ParsedPhash + for _, h := range hashes { + val, parseErr := strconv.ParseUint(h.PHash, 16, 64) + if parseErr == nil { + parsedHashes = append(parsedHashes, ParsedPhash{ID: h.ID, PHash: val}) + } + } + + // Helper for Popcount + popcount := func(x uint64) int { + x -= (x >> 1) & 0x5555555555555555 + x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333) + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f + return int((x * 0x0101010101010101) >> 56) + } + + // Adjacency list for connected components + adj := make(map[int][]int) + nodes := make(map[int]bool) + + // O(N^2) comparison in memory + for i := 0; i < len(parsedHashes); i++ { + for j := i + 1; j < len(parsedHashes); j++ { + diff := popcount(parsedHashes[i].PHash ^ parsedHashes[j].PHash) + if diff <= distance { + id1 := parsedHashes[i].ID + id2 := parsedHashes[j].ID + adj[id1] = append(adj[id1], id2) + adj[id2] = append(adj[id2], id1) + nodes[id1] = true + nodes[id2] = true + } + } + } + + // Find connected components + visited := make(map[int]bool) + var components [][]int + + for node := range nodes { + if !visited[node] { + var component []int + queue := []int{node} + visited[node] = true + + for len(queue) > 0 { + curr := queue[0] + queue = queue[1:] + component = append(component, curr) + + for _, neighbor := range adj[curr] { + if !visited[neighbor] { + visited[neighbor] = true + queue = append(queue, neighbor) + } + } + } + if len(component) > 1 { + components = append(components, component) + } + } + } + + // Fetch actual image objects + var result [][]*models.Image + for _, comp := range components { + var group []*models.Image + for _, id := range comp { + img, err := qb.Find(ctx, id) + if err == nil && img != nil { + group = append(group, img) + } + } + if len(group) > 1 { + result = append(result, group) + } + } + + return result, nil +} diff --git a/ui/v2.5/graphql/queries/image.graphql b/ui/v2.5/graphql/queries/image.graphql index d2c6cdac8..c74fc4cfd 100644 --- a/ui/v2.5/graphql/queries/image.graphql +++ b/ui/v2.5/graphql/queries/image.graphql @@ -35,3 +35,9 @@ query FindImage($id: ID!, $checksum: String) { ...ImageData } } + +query FindDuplicateImages($distance: Int) { + findDuplicateImages(distance: $distance) { + ...ImageData + } +} diff --git a/ui/v2.5/src/App.tsx b/ui/v2.5/src/App.tsx index d08274b18..9bb40e7cb 100644 --- a/ui/v2.5/src/App.tsx +++ b/ui/v2.5/src/App.tsx @@ -82,6 +82,9 @@ const SceneFilenameParser = lazyComponent( const SceneDuplicateChecker = lazyComponent( () => import("./components/SceneDuplicateChecker/SceneDuplicateChecker") ); +const ImageDuplicateChecker = lazyComponent( + () => import("./components/ImageDuplicateChecker/ImageDuplicateChecker") +); const appleRendering = isPlatformUniquelyRenderedByApple(); @@ -269,6 +272,10 @@ export const App: React.FC = () => { path="/sceneDuplicateChecker" component={SceneDuplicateChecker} /> + diff --git a/ui/v2.5/src/components/ImageDuplicateChecker/ImageDuplicateChecker.tsx b/ui/v2.5/src/components/ImageDuplicateChecker/ImageDuplicateChecker.tsx new file mode 100644 index 000000000..7ff3b3b26 --- /dev/null +++ b/ui/v2.5/src/components/ImageDuplicateChecker/ImageDuplicateChecker.tsx @@ -0,0 +1,114 @@ +import React, { useState } from "react"; +import { Button, Form, Spinner } from "react-bootstrap"; +import { FormattedMessage } from "react-intl"; +import { useFindDuplicateImagesQuery } from "src/core/generated-graphql"; +import { PatchContainerComponent } from "src/patch"; + +const ImageDuplicateCheckerSection = PatchContainerComponent( + "ImageDuplicateCheckerSection" +); + +const ImageDuplicateChecker: React.FC = () => { + const [distance, setDistance] = useState(0); + const [isSearching, setIsSearching] = useState(false); + const [hasSearched, setHasSearched] = useState(false); + + // We lazily fetch the query only when "Search" is clicked + const { data, loading, error, refetch } = useFindDuplicateImagesQuery({ + variables: { distance }, + skip: !hasSearched, + fetchPolicy: "network-only", + }); + + const handleSearch = () => { + setIsSearching(true); + setHasSearched(true); + refetch({ distance }).finally(() => setIsSearching(false)); + }; + + const results = data?.findDuplicateImages ?? []; + + return ( +
+
+ +

+ +

+
+ + PHash Distance + setDistance(parseInt(e.target.value) || 0)} + /> + + Distance 0 means exact matches. + + + + +
+ + {error && ( +
Error: {error.message}
+ )} + + {hasSearched && !loading && !error && results.length === 0 && ( +

No duplicates found.

+ )} + + {results.map((group, index) => { + if (!group || group.length < 2) return null; + return ( +
+
Group {index + 1}
+ {/* ImageList requires an array of items with proper types. We map it nicely. */} +
+ {group.map((img) => ( +
+ {img.title +
+ {img.title || img.id} +
+
+ ))} +
+
+ ); + })} +
+
+
+ ); +}; + +export default ImageDuplicateChecker; diff --git a/ui/v2.5/src/components/Settings/SettingsToolsPanel.tsx b/ui/v2.5/src/components/Settings/SettingsToolsPanel.tsx index e3577a499..5e7f6bee5 100644 --- a/ui/v2.5/src/components/Settings/SettingsToolsPanel.tsx +++ b/ui/v2.5/src/components/Settings/SettingsToolsPanel.tsx @@ -48,6 +48,20 @@ export const SettingsToolsPanel: React.FC = () => { /> + + + + + + + } + /> + + ); }; diff --git a/ui/v2.5/src/locales/en-GB.json b/ui/v2.5/src/locales/en-GB.json index 37b6b6d44..5f6fb1158 100644 --- a/ui/v2.5/src/locales/en-GB.json +++ b/ui/v2.5/src/locales/en-GB.json @@ -643,7 +643,9 @@ "whitespace_chars": "Whitespace characters", "whitespace_chars_desc": "These characters will be replaced with whitespace in the title" }, - "scene_tools": "Scene Tools" + "scene_tools": "Scene Tools", + "image_tools": "Image Tools", + "image_duplicate_checker": "Image Duplicate Checker" }, "ui": { "abbreviate_counters": { diff --git a/ui/v2.5/src/locales/en-US.json b/ui/v2.5/src/locales/en-US.json index 7d730601c..f1b1dc84e 100644 --- a/ui/v2.5/src/locales/en-US.json +++ b/ui/v2.5/src/locales/en-US.json @@ -9,7 +9,9 @@ "tools": { "scene_filename_parser": { "ignore_organized": "Ignore organized scenes" - } + }, + "image_tools": "Image Tools", + "image_duplicate_checker": "Image Duplicate Checker" }, "ui": { "custom_locales": {