diff --git a/pkg/sqlite/scene.go b/pkg/sqlite/scene.go index c2093431d..a700e9da3 100644 --- a/pkg/sqlite/scene.go +++ b/pkg/sqlite/scene.go @@ -1430,11 +1430,58 @@ func (qb *SceneStore) GetStashIDs(ctx context.Context, sceneID int) ([]models.St return sceneRepository.stashIDs.get(ctx, sceneID) } -func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) { +func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) { var dupeIds [][]int + + query, err := qb.makeQuery(ctx, filter, nil) + if err != nil { + return nil, err + } + + // Add necessary joins for duplicate checking + query.addJoins( + join{ + table: scenesFilesTable, + onClause: "scenes.id = scenes_files.scene_id", + }, + join{ + table: fileTable, + onClause: "scenes_files.file_id = files.id", + }, + join{ + table: fingerprintTable, + onClause: "scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash'", + }, + join{ + table: videoFileTable, + onClause: "files.id = video_files.file_id", + }, + ) + if distance == 0 { + query.columns = []string{ + "scenes.id as scene_id", + "video_files.duration as file_duration", + "files.size as file_size", + "files_fingerprints.fingerprint as phash", + "abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff", + } + + sqlStr := query.toSQL(false) + + finalQuery := ` +SELECT GROUP_CONCAT(DISTINCT scene_id) as ids +FROM (` + sqlStr + `) +WHERE durationDiff <= ? + OR ? < 0 +GROUP BY phash +HAVING COUNT(phash) > 1 + AND COUNT(DISTINCT scene_id) > 1 +ORDER BY SUM(file_size) DESC; +` var ids []string - if err := dbWrapper.Select(ctx, &ids, findExactDuplicateQuery, durationDiff); err != nil { + args := append(query.allArgs(), durationDiff, durationDiff) + if err := dbWrapper.Select(ctx, &ids, finalQuery, args...); err != nil { return nil, err } @@ -1452,9 +1499,18 @@ func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, duration } } } else { + query.columns = []string{ + "scenes.id as id", + "files_fingerprints.fingerprint as phash", + "video_files.duration as duration", + } + query.sortAndPagination = " ORDER BY files.size DESC" + + sqlStr := query.toSQL(true) + var hashes []*utils.Phash - if err := sceneRepository.queryFunc(ctx, findAllPhashesQuery, nil, false, func(rows *sqlx.Rows) error { + if err := sceneRepository.queryFunc(ctx, sqlStr, query.allArgs(), false, func(rows *sqlx.Rows) error { phash := utils.Phash{ Bucket: -1, Duration: -1,