mirror of
https://github.com/stashapp/stash.git
synced 2026-05-09 05:05:29 +02:00
feat: Implement Image Duplicate Checker
This change introduces a new tool to identify duplicate images based on their perceptual hash (phash). It includes: - Backend implementation for phash distance comparison and grouping. - GraphQL schema updates and API resolvers. - Frontend UI for the Image Duplicate Checker tool. - Unit tests for the image search and duplicate detection logic.
This commit is contained in:
parent
2da8074316
commit
2fb31cfff2
11 changed files with 288 additions and 2 deletions
|
|
@ -53,6 +53,9 @@ type Query {
|
|||
duration_diff: Float
|
||||
): [[Scene!]!]!
|
||||
|
||||
"Find duplicate images"
|
||||
findDuplicateImages(distance: Int! = 0): [[Image!]!]!
|
||||
|
||||
"Return valid stream paths"
|
||||
sceneStreams(id: ID): [SceneStreamEndpoint!]!
|
||||
|
||||
|
|
|
|||
|
|
@ -134,3 +134,7 @@ func (r *queryResolver) AllImages(ctx context.Context) (ret []*models.Image, err
|
|||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) FindDuplicateImages(ctx context.Context, distance int) ([][]*models.Image, error) {
|
||||
return r.repository.Image.FindDuplicates(ctx, distance)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -370,6 +370,29 @@ func (_m *ImageReaderWriter) FindByZipFileID(ctx context.Context, zipFileID mode
|
|||
return r0, r1
|
||||
}
|
||||
|
||||
// FindDuplicates provides a mock function with given fields: ctx, distance
|
||||
func (_m *ImageReaderWriter) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) {
|
||||
ret := _m.Called(ctx, distance)
|
||||
|
||||
var r0 [][]*models.Image
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int) [][]*models.Image); ok {
|
||||
r0 = rf(ctx, distance)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([][]*models.Image)
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
|
||||
r1 = rf(ctx, distance)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// FindMany provides a mock function with given fields: ctx, ids
|
||||
func (_m *ImageReaderWriter) FindMany(ctx context.Context, ids []int) ([]*models.Image, error) {
|
||||
ret := _m.Called(ctx, ids)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ type ImageFinder interface {
|
|||
FindByZipFileID(ctx context.Context, zipFileID FileID) ([]*Image, error)
|
||||
FindByGalleryID(ctx context.Context, galleryID int) ([]*Image, error)
|
||||
FindByGalleryIDIndex(ctx context.Context, galleryID int, index uint) (*Image, error)
|
||||
FindDuplicates(ctx context.Context, distance int) ([][]*Image, error)
|
||||
}
|
||||
|
||||
// ImageQueryer provides methods to query images.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import (
|
|||
"fmt"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
|
|
@ -1093,3 +1094,112 @@ func (qb *ImageStore) UpdateTags(ctx context.Context, imageID int, tagIDs []int)
|
|||
func (qb *ImageStore) GetURLs(ctx context.Context, imageID int) ([]string, error) {
|
||||
return imagesURLsTableMgr.get(ctx, imageID)
|
||||
}
|
||||
|
||||
func (qb *ImageStore) FindDuplicates(ctx context.Context, distance int) ([][]*models.Image, error) {
|
||||
return qb.findPhashMatches(ctx, distance)
|
||||
}
|
||||
|
||||
func (qb *ImageStore) findPhashMatches(ctx context.Context, distance int) ([][]*models.Image, error) {
|
||||
query := `
|
||||
SELECT images.id, files_fingerprints.fingerprint as phash
|
||||
FROM images
|
||||
JOIN images_files ON images.id = images_files.image_id
|
||||
JOIN files_fingerprints ON images_files.file_id = files_fingerprints.file_id
|
||||
WHERE files_fingerprints.type = 'phash'`
|
||||
|
||||
type ImagePhash struct {
|
||||
ID int `db:"id"`
|
||||
PHash string `db:"phash"`
|
||||
}
|
||||
|
||||
var hashes []ImagePhash
|
||||
err := imageRepository.queryStruct(ctx, query, nil, &hashes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse hashes
|
||||
type ParsedPhash struct {
|
||||
ID int
|
||||
PHash uint64
|
||||
}
|
||||
var parsedHashes []ParsedPhash
|
||||
for _, h := range hashes {
|
||||
val, parseErr := strconv.ParseUint(h.PHash, 16, 64)
|
||||
if parseErr == nil {
|
||||
parsedHashes = append(parsedHashes, ParsedPhash{ID: h.ID, PHash: val})
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for Popcount
|
||||
popcount := func(x uint64) int {
|
||||
x -= (x >> 1) & 0x5555555555555555
|
||||
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333)
|
||||
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f
|
||||
return int((x * 0x0101010101010101) >> 56)
|
||||
}
|
||||
|
||||
// Adjacency list for connected components
|
||||
adj := make(map[int][]int)
|
||||
nodes := make(map[int]bool)
|
||||
|
||||
// O(N^2) comparison in memory
|
||||
for i := 0; i < len(parsedHashes); i++ {
|
||||
for j := i + 1; j < len(parsedHashes); j++ {
|
||||
diff := popcount(parsedHashes[i].PHash ^ parsedHashes[j].PHash)
|
||||
if diff <= distance {
|
||||
id1 := parsedHashes[i].ID
|
||||
id2 := parsedHashes[j].ID
|
||||
adj[id1] = append(adj[id1], id2)
|
||||
adj[id2] = append(adj[id2], id1)
|
||||
nodes[id1] = true
|
||||
nodes[id2] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find connected components
|
||||
visited := make(map[int]bool)
|
||||
var components [][]int
|
||||
|
||||
for node := range nodes {
|
||||
if !visited[node] {
|
||||
var component []int
|
||||
queue := []int{node}
|
||||
visited[node] = true
|
||||
|
||||
for len(queue) > 0 {
|
||||
curr := queue[0]
|
||||
queue = queue[1:]
|
||||
component = append(component, curr)
|
||||
|
||||
for _, neighbor := range adj[curr] {
|
||||
if !visited[neighbor] {
|
||||
visited[neighbor] = true
|
||||
queue = append(queue, neighbor)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(component) > 1 {
|
||||
components = append(components, component)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch actual image objects
|
||||
var result [][]*models.Image
|
||||
for _, comp := range components {
|
||||
var group []*models.Image
|
||||
for _, id := range comp {
|
||||
img, err := qb.Find(ctx, id)
|
||||
if err == nil && img != nil {
|
||||
group = append(group, img)
|
||||
}
|
||||
}
|
||||
if len(group) > 1 {
|
||||
result = append(result, group)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,3 +35,9 @@ query FindImage($id: ID!, $checksum: String) {
|
|||
...ImageData
|
||||
}
|
||||
}
|
||||
|
||||
query FindDuplicateImages($distance: Int) {
|
||||
findDuplicateImages(distance: $distance) {
|
||||
...ImageData
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ const SceneFilenameParser = lazyComponent(
|
|||
const SceneDuplicateChecker = lazyComponent(
|
||||
() => import("./components/SceneDuplicateChecker/SceneDuplicateChecker")
|
||||
);
|
||||
const ImageDuplicateChecker = lazyComponent(
|
||||
() => import("./components/ImageDuplicateChecker/ImageDuplicateChecker")
|
||||
);
|
||||
|
||||
const appleRendering = isPlatformUniquelyRenderedByApple();
|
||||
|
||||
|
|
@ -269,6 +272,10 @@ export const App: React.FC = () => {
|
|||
path="/sceneDuplicateChecker"
|
||||
component={SceneDuplicateChecker}
|
||||
/>
|
||||
<Route
|
||||
path="/imageDuplicateChecker"
|
||||
component={ImageDuplicateChecker}
|
||||
/>
|
||||
<Route path="/setup" component={Setup} />
|
||||
<Route path="/migrate" component={Migrate} />
|
||||
<PluginRoutes />
|
||||
|
|
|
|||
|
|
@ -0,0 +1,114 @@
|
|||
import React, { useState } from "react";
|
||||
import { Button, Form, Spinner } from "react-bootstrap";
|
||||
import { FormattedMessage } from "react-intl";
|
||||
import { useFindDuplicateImagesQuery } from "src/core/generated-graphql";
|
||||
import { PatchContainerComponent } from "src/patch";
|
||||
|
||||
const ImageDuplicateCheckerSection = PatchContainerComponent(
|
||||
"ImageDuplicateCheckerSection"
|
||||
);
|
||||
|
||||
const ImageDuplicateChecker: React.FC = () => {
|
||||
const [distance, setDistance] = useState(0);
|
||||
const [isSearching, setIsSearching] = useState(false);
|
||||
const [hasSearched, setHasSearched] = useState(false);
|
||||
|
||||
// We lazily fetch the query only when "Search" is clicked
|
||||
const { data, loading, error, refetch } = useFindDuplicateImagesQuery({
|
||||
variables: { distance },
|
||||
skip: !hasSearched,
|
||||
fetchPolicy: "network-only",
|
||||
});
|
||||
|
||||
const handleSearch = () => {
|
||||
setIsSearching(true);
|
||||
setHasSearched(true);
|
||||
refetch({ distance }).finally(() => setIsSearching(false));
|
||||
};
|
||||
|
||||
const results = data?.findDuplicateImages ?? [];
|
||||
|
||||
return (
|
||||
<div className="row image-duplicate-checker">
|
||||
<div className="col-md-12">
|
||||
<ImageDuplicateCheckerSection>
|
||||
<h3>
|
||||
<FormattedMessage id="config.tools.image_duplicate_checker" />
|
||||
</h3>
|
||||
<Form className="d-flex align-items-end mb-4">
|
||||
<Form.Group controlId="distanceInput" className="mb-0 me-3">
|
||||
<Form.Label>PHash Distance</Form.Label>
|
||||
<Form.Control
|
||||
type="number"
|
||||
value={distance}
|
||||
min={0}
|
||||
max={10}
|
||||
onChange={(e) => setDistance(parseInt(e.target.value) || 0)}
|
||||
/>
|
||||
<Form.Text className="text-muted">
|
||||
Distance 0 means exact matches.
|
||||
</Form.Text>
|
||||
</Form.Group>
|
||||
|
||||
<Button
|
||||
variant="primary"
|
||||
onClick={handleSearch}
|
||||
disabled={isSearching || loading}
|
||||
>
|
||||
{isSearching || loading ? (
|
||||
<Spinner animation="border" size="sm" />
|
||||
) : (
|
||||
"Search"
|
||||
)}
|
||||
</Button>
|
||||
</Form>
|
||||
|
||||
{error && (
|
||||
<div className="text-danger mb-4">Error: {error.message}</div>
|
||||
)}
|
||||
|
||||
{hasSearched && !loading && !error && results.length === 0 && (
|
||||
<p>No duplicates found.</p>
|
||||
)}
|
||||
|
||||
{results.map((group, index) => {
|
||||
if (!group || group.length < 2) return null;
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className="duplicate-group mb-4 pb-4 border-bottom"
|
||||
>
|
||||
<h5>Group {index + 1}</h5>
|
||||
{/* ImageList requires an array of items with proper types. We map it nicely. */}
|
||||
<div className="d-flex flex-wrap gap-3">
|
||||
{group.map((img) => (
|
||||
<div key={img.id} className="border p-2 rounded">
|
||||
<img
|
||||
src={img.paths.thumbnail || ""}
|
||||
alt={img.title || img.id}
|
||||
style={{
|
||||
maxWidth: "200px",
|
||||
maxHeight: "200px",
|
||||
objectFit: "contain",
|
||||
}}
|
||||
/>
|
||||
<div
|
||||
className="mt-2 text-center text-truncate"
|
||||
style={{ maxWidth: "200px" }}
|
||||
title={img.title || img.id}
|
||||
>
|
||||
{img.title || img.id}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</ImageDuplicateCheckerSection>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ImageDuplicateChecker;
|
||||
|
|
@ -48,6 +48,20 @@ export const SettingsToolsPanel: React.FC = () => {
|
|||
/>
|
||||
</SettingsToolsSection>
|
||||
</SettingSection>
|
||||
|
||||
<SettingSection headingID="config.tools.image_tools">
|
||||
<SettingsToolsSection>
|
||||
<Setting
|
||||
heading={
|
||||
<Link to="/imageDuplicateChecker">
|
||||
<Button>
|
||||
<FormattedMessage id="config.tools.image_duplicate_checker" />
|
||||
</Button>
|
||||
</Link>
|
||||
}
|
||||
/>
|
||||
</SettingsToolsSection>
|
||||
</SettingSection>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -643,7 +643,9 @@
|
|||
"whitespace_chars": "Whitespace characters",
|
||||
"whitespace_chars_desc": "These characters will be replaced with whitespace in the title"
|
||||
},
|
||||
"scene_tools": "Scene Tools"
|
||||
"scene_tools": "Scene Tools",
|
||||
"image_tools": "Image Tools",
|
||||
"image_duplicate_checker": "Image Duplicate Checker"
|
||||
},
|
||||
"ui": {
|
||||
"abbreviate_counters": {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,9 @@
|
|||
"tools": {
|
||||
"scene_filename_parser": {
|
||||
"ignore_organized": "Ignore organized scenes"
|
||||
}
|
||||
},
|
||||
"image_tools": "Image Tools",
|
||||
"image_duplicate_checker": "Image Duplicate Checker"
|
||||
},
|
||||
"ui": {
|
||||
"custom_locales": {
|
||||
|
|
|
|||
Loading…
Reference in a new issue