mirror of
https://github.com/stashapp/stash.git
synced 2026-05-09 05:05:29 +02:00
feat(sqlite): dynamically build FindDuplicates query
Refactors the FindDuplicates implementation to use the internal qb.makeQuery tool instead of static raw SQL. This enables the duplicate checker to utilize the provided SceneFilterType, natively supporting advanced filtering like path exclusions.
This commit is contained in:
parent
c7f1d5612f
commit
864412c735
1 changed files with 59 additions and 3 deletions
|
|
@ -1430,11 +1430,58 @@ func (qb *SceneStore) GetStashIDs(ctx context.Context, sceneID int) ([]models.St
|
|||
return sceneRepository.stashIDs.get(ctx, sceneID)
|
||||
}
|
||||
|
||||
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) {
|
||||
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) {
|
||||
var dupeIds [][]int
|
||||
|
||||
query, err := qb.makeQuery(ctx, filter, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Add necessary joins for duplicate checking
|
||||
query.addJoins(
|
||||
join{
|
||||
table: scenesFilesTable,
|
||||
onClause: "scenes.id = scenes_files.scene_id",
|
||||
},
|
||||
join{
|
||||
table: fileTable,
|
||||
onClause: "scenes_files.file_id = files.id",
|
||||
},
|
||||
join{
|
||||
table: fingerprintTable,
|
||||
onClause: "scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash'",
|
||||
},
|
||||
join{
|
||||
table: videoFileTable,
|
||||
onClause: "files.id = video_files.file_id",
|
||||
},
|
||||
)
|
||||
|
||||
if distance == 0 {
|
||||
query.columns = []string{
|
||||
"scenes.id as scene_id",
|
||||
"video_files.duration as file_duration",
|
||||
"files.size as file_size",
|
||||
"files_fingerprints.fingerprint as phash",
|
||||
"abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff",
|
||||
}
|
||||
|
||||
sqlStr := query.toSQL(false)
|
||||
|
||||
finalQuery := `
|
||||
SELECT GROUP_CONCAT(DISTINCT scene_id) as ids
|
||||
FROM (` + sqlStr + `)
|
||||
WHERE durationDiff <= ?
|
||||
OR ? < 0
|
||||
GROUP BY phash
|
||||
HAVING COUNT(phash) > 1
|
||||
AND COUNT(DISTINCT scene_id) > 1
|
||||
ORDER BY SUM(file_size) DESC;
|
||||
`
|
||||
var ids []string
|
||||
if err := dbWrapper.Select(ctx, &ids, findExactDuplicateQuery, durationDiff); err != nil {
|
||||
args := append(query.allArgs(), durationDiff, durationDiff)
|
||||
if err := dbWrapper.Select(ctx, &ids, finalQuery, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
@ -1452,9 +1499,18 @@ func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, duration
|
|||
}
|
||||
}
|
||||
} else {
|
||||
query.columns = []string{
|
||||
"scenes.id as id",
|
||||
"files_fingerprints.fingerprint as phash",
|
||||
"video_files.duration as duration",
|
||||
}
|
||||
query.sortAndPagination = " ORDER BY files.size DESC"
|
||||
|
||||
sqlStr := query.toSQL(true)
|
||||
|
||||
var hashes []*utils.Phash
|
||||
|
||||
if err := sceneRepository.queryFunc(ctx, findAllPhashesQuery, nil, false, func(rows *sqlx.Rows) error {
|
||||
if err := sceneRepository.queryFunc(ctx, sqlStr, query.allArgs(), false, func(rows *sqlx.Rows) error {
|
||||
phash := utils.Phash{
|
||||
Bucket: -1,
|
||||
Duration: -1,
|
||||
|
|
|
|||
Loading…
Reference in a new issue