From 864412c73505514da8bcff200e5ff3004b41c44c Mon Sep 17 00:00:00 2001 From: Slick Daddy Date: Sun, 3 May 2026 20:36:13 +0300 Subject: [PATCH] feat(sqlite): dynamically build FindDuplicates query Refactors the FindDuplicates implementation to use the internal qb.makeQuery tool instead of static raw SQL. This enables the duplicate checker to utilize the provided SceneFilterType, natively supporting advanced filtering like path exclusions. --- pkg/sqlite/scene.go | 62 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/pkg/sqlite/scene.go b/pkg/sqlite/scene.go index c2093431d..a700e9da3 100644 --- a/pkg/sqlite/scene.go +++ b/pkg/sqlite/scene.go @@ -1430,11 +1430,58 @@ func (qb *SceneStore) GetStashIDs(ctx context.Context, sceneID int) ([]models.St return sceneRepository.stashIDs.get(ctx, sceneID) } -func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) { +func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) { var dupeIds [][]int + + query, err := qb.makeQuery(ctx, filter, nil) + if err != nil { + return nil, err + } + + // Add necessary joins for duplicate checking + query.addJoins( + join{ + table: scenesFilesTable, + onClause: "scenes.id = scenes_files.scene_id", + }, + join{ + table: fileTable, + onClause: "scenes_files.file_id = files.id", + }, + join{ + table: fingerprintTable, + onClause: "scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash'", + }, + join{ + table: videoFileTable, + onClause: "files.id = video_files.file_id", + }, + ) + if distance == 0 { + query.columns = []string{ + "scenes.id as scene_id", + "video_files.duration as file_duration", + "files.size as file_size", + "files_fingerprints.fingerprint as phash", + "abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff", + } + + sqlStr := query.toSQL(false) + + finalQuery := ` +SELECT GROUP_CONCAT(DISTINCT scene_id) as ids +FROM (` + sqlStr + `) +WHERE durationDiff <= ? + OR ? < 0 +GROUP BY phash +HAVING COUNT(phash) > 1 + AND COUNT(DISTINCT scene_id) > 1 +ORDER BY SUM(file_size) DESC; +` var ids []string - if err := dbWrapper.Select(ctx, &ids, findExactDuplicateQuery, durationDiff); err != nil { + args := append(query.allArgs(), durationDiff, durationDiff) + if err := dbWrapper.Select(ctx, &ids, finalQuery, args...); err != nil { return nil, err } @@ -1452,9 +1499,18 @@ func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, duration } } } else { + query.columns = []string{ + "scenes.id as id", + "files_fingerprints.fingerprint as phash", + "video_files.duration as duration", + } + query.sortAndPagination = " ORDER BY files.size DESC" + + sqlStr := query.toSQL(true) + var hashes []*utils.Phash - if err := sceneRepository.queryFunc(ctx, findAllPhashesQuery, nil, false, func(rows *sqlx.Rows) error { + if err := sceneRepository.queryFunc(ctx, sqlStr, query.allArgs(), false, func(rows *sqlx.Rows) error { phash := utils.Phash{ Bucket: -1, Duration: -1,