feat(sqlite): dynamically build FindDuplicates query

Refactors the FindDuplicates implementation to use the internal qb.makeQuery tool instead of static raw SQL. This enables the duplicate checker to utilize the provided SceneFilterType, natively supporting advanced filtering like path exclusions.
This commit is contained in:
Slick Daddy 2026-05-03 20:36:13 +03:00
parent c7f1d5612f
commit 864412c735

View file

@ -1430,11 +1430,58 @@ func (qb *SceneStore) GetStashIDs(ctx context.Context, sceneID int) ([]models.St
return sceneRepository.stashIDs.get(ctx, sceneID)
}
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) {
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) {
var dupeIds [][]int
query, err := qb.makeQuery(ctx, filter, nil)
if err != nil {
return nil, err
}
// Add necessary joins for duplicate checking
query.addJoins(
join{
table: scenesFilesTable,
onClause: "scenes.id = scenes_files.scene_id",
},
join{
table: fileTable,
onClause: "scenes_files.file_id = files.id",
},
join{
table: fingerprintTable,
onClause: "scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash'",
},
join{
table: videoFileTable,
onClause: "files.id = video_files.file_id",
},
)
if distance == 0 {
query.columns = []string{
"scenes.id as scene_id",
"video_files.duration as file_duration",
"files.size as file_size",
"files_fingerprints.fingerprint as phash",
"abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff",
}
sqlStr := query.toSQL(false)
finalQuery := `
SELECT GROUP_CONCAT(DISTINCT scene_id) as ids
FROM (` + sqlStr + `)
WHERE durationDiff <= ?
OR ? < 0
GROUP BY phash
HAVING COUNT(phash) > 1
AND COUNT(DISTINCT scene_id) > 1
ORDER BY SUM(file_size) DESC;
`
var ids []string
if err := dbWrapper.Select(ctx, &ids, findExactDuplicateQuery, durationDiff); err != nil {
args := append(query.allArgs(), durationDiff, durationDiff)
if err := dbWrapper.Select(ctx, &ids, finalQuery, args...); err != nil {
return nil, err
}
@ -1452,9 +1499,18 @@ func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, duration
}
}
} else {
query.columns = []string{
"scenes.id as id",
"files_fingerprints.fingerprint as phash",
"video_files.duration as duration",
}
query.sortAndPagination = " ORDER BY files.size DESC"
sqlStr := query.toSQL(true)
var hashes []*utils.Phash
if err := sceneRepository.queryFunc(ctx, findAllPhashesQuery, nil, false, func(rows *sqlx.Rows) error {
if err := sceneRepository.queryFunc(ctx, sqlStr, query.allArgs(), false, func(rows *sqlx.Rows) error {
phash := utils.Phash{
Bucket: -1,
Duration: -1,