mirror of
https://github.com/stashapp/stash.git
synced 2026-05-09 05:05:29 +02:00
Merge 765a6f6fc4 into 01a7583364
This commit is contained in:
commit
3a3d2b3575
6 changed files with 74 additions and 50 deletions
|
|
@ -51,6 +51,7 @@ type Query {
|
|||
Fractional seconds are ok: 0.5 will mean only files that have durations within 0.5 seconds between them will be matched based on PHash distance.
|
||||
"""
|
||||
duration_diff: Float
|
||||
scene_filter: SceneFilterType
|
||||
): [[Scene!]!]!
|
||||
|
||||
"Return valid stream paths"
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ func (r *queryResolver) ParseSceneFilenames(ctx context.Context, filter *models.
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) FindDuplicateScenes(ctx context.Context, distance *int, durationDiff *float64) (ret [][]*models.Scene, err error) {
|
||||
func (r *queryResolver) FindDuplicateScenes(ctx context.Context, distance *int, durationDiff *float64, sceneFilter *models.SceneFilterType) (ret [][]*models.Scene, err error) {
|
||||
dist := 0
|
||||
durDiff := -1.
|
||||
if distance != nil {
|
||||
|
|
@ -237,7 +237,7 @@ func (r *queryResolver) FindDuplicateScenes(ctx context.Context, distance *int,
|
|||
durDiff = *durationDiff
|
||||
}
|
||||
if err := r.withReadTxn(ctx, func(ctx context.Context) error {
|
||||
ret, err = r.repository.Scene.FindDuplicates(ctx, dist, durDiff)
|
||||
ret, err = r.repository.Scene.FindDuplicates(ctx, dist, durDiff, sceneFilter)
|
||||
return err
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
|
|
@ -664,13 +664,13 @@ func (_m *SceneReaderWriter) FindByPrimaryFileID(ctx context.Context, fileID mod
|
|||
return r0, r1
|
||||
}
|
||||
|
||||
// FindDuplicates provides a mock function with given fields: ctx, distance, durationDiff
|
||||
func (_m *SceneReaderWriter) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) {
|
||||
ret := _m.Called(ctx, distance, durationDiff)
|
||||
// FindDuplicates provides a mock function with given fields: ctx, distance, durationDiff, filter
|
||||
func (_m *SceneReaderWriter) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) {
|
||||
ret := _m.Called(ctx, distance, durationDiff, filter)
|
||||
|
||||
var r0 [][]*models.Scene
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, float64) [][]*models.Scene); ok {
|
||||
r0 = rf(ctx, distance, durationDiff)
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, float64, *models.SceneFilterType) [][]*models.Scene); ok {
|
||||
r0 = rf(ctx, distance, durationDiff, filter)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([][]*models.Scene)
|
||||
|
|
@ -678,8 +678,8 @@ func (_m *SceneReaderWriter) FindDuplicates(ctx context.Context, distance int, d
|
|||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int, float64) error); ok {
|
||||
r1 = rf(ctx, distance, durationDiff)
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int, float64, *models.SceneFilterType) error); ok {
|
||||
r1 = rf(ctx, distance, durationDiff, filter)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ type SceneFinder interface {
|
|||
FindByPerformerID(ctx context.Context, performerID int) ([]*Scene, error)
|
||||
FindByGalleryID(ctx context.Context, performerID int) ([]*Scene, error)
|
||||
FindByGroupID(ctx context.Context, groupID int) ([]*Scene, error)
|
||||
FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*Scene, error)
|
||||
FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *SceneFilterType) ([][]*Scene, error)
|
||||
}
|
||||
|
||||
// SceneQueryer provides methods to query scenes.
|
||||
|
|
|
|||
|
|
@ -41,41 +41,6 @@ const (
|
|||
sceneCoverBlobColumn = "cover_blob"
|
||||
)
|
||||
|
||||
var findExactDuplicateQuery = `
|
||||
SELECT GROUP_CONCAT(DISTINCT scene_id) as ids
|
||||
FROM (
|
||||
SELECT scenes.id as scene_id
|
||||
, video_files.duration as file_duration
|
||||
, files.size as file_size
|
||||
, files_fingerprints.fingerprint as phash
|
||||
, abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff
|
||||
FROM scenes
|
||||
INNER JOIN scenes_files ON (scenes.id = scenes_files.scene_id)
|
||||
INNER JOIN files ON (scenes_files.file_id = files.id)
|
||||
INNER JOIN files_fingerprints ON (scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash')
|
||||
INNER JOIN video_files ON (files.id == video_files.file_id)
|
||||
)
|
||||
WHERE durationDiff <= ?1
|
||||
OR ?1 < 0 -- Always TRUE if the parameter is negative.
|
||||
-- That will disable the durationDiff checking.
|
||||
GROUP BY phash
|
||||
HAVING COUNT(phash) > 1
|
||||
AND COUNT(DISTINCT scene_id) > 1
|
||||
ORDER BY SUM(file_size) DESC;
|
||||
`
|
||||
|
||||
var findAllPhashesQuery = `
|
||||
SELECT scenes.id as id
|
||||
, files_fingerprints.fingerprint as phash
|
||||
, video_files.duration as duration
|
||||
FROM scenes
|
||||
INNER JOIN scenes_files ON (scenes.id = scenes_files.scene_id)
|
||||
INNER JOIN files ON (scenes_files.file_id = files.id)
|
||||
INNER JOIN files_fingerprints ON (scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash')
|
||||
INNER JOIN video_files ON (files.id == video_files.file_id)
|
||||
ORDER BY files.size DESC;
|
||||
`
|
||||
|
||||
type sceneRow struct {
|
||||
ID int `db:"id" goqu:"skipinsert"`
|
||||
Title zero.String `db:"title"`
|
||||
|
|
@ -1430,11 +1395,60 @@ func (qb *SceneStore) GetStashIDs(ctx context.Context, sceneID int) ([]models.St
|
|||
return sceneRepository.stashIDs.get(ctx, sceneID)
|
||||
}
|
||||
|
||||
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64) ([][]*models.Scene, error) {
|
||||
func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, durationDiff float64, filter *models.SceneFilterType) ([][]*models.Scene, error) {
|
||||
var dupeIds [][]int
|
||||
|
||||
query, err := qb.makeQuery(ctx, filter, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Add necessary joins for duplicate checking
|
||||
query.addJoins(
|
||||
join{
|
||||
table: scenesFilesTable,
|
||||
onClause: "scenes.id = scenes_files.scene_id",
|
||||
},
|
||||
join{
|
||||
table: fileTable,
|
||||
onClause: "scenes_files.file_id = files.id",
|
||||
},
|
||||
join{
|
||||
table: fingerprintTable,
|
||||
onClause: "scenes_files.file_id = files_fingerprints.file_id AND files_fingerprints.type = 'phash'",
|
||||
},
|
||||
join{
|
||||
table: videoFileTable,
|
||||
onClause: "files.id = video_files.file_id",
|
||||
},
|
||||
)
|
||||
|
||||
if distance == 0 {
|
||||
query.columns = []string{
|
||||
"scenes.id as scene_id",
|
||||
"video_files.duration as file_duration",
|
||||
"files.size as file_size",
|
||||
"files_fingerprints.fingerprint as phash",
|
||||
"abs(max(video_files.duration) OVER (PARTITION by files_fingerprints.fingerprint) - video_files.duration) as durationDiff",
|
||||
}
|
||||
|
||||
sqlStr := query.toSQL(false)
|
||||
|
||||
finalQuery := `
|
||||
SELECT GROUP_CONCAT(DISTINCT scene_id) as ids
|
||||
FROM (` + sqlStr + `)
|
||||
WHERE phash IS NOT NULL
|
||||
AND (durationDiff <= ?
|
||||
OR ? < 0)
|
||||
GROUP BY phash
|
||||
HAVING COUNT(phash) > 1
|
||||
AND COUNT(DISTINCT scene_id) > 1
|
||||
ORDER BY SUM(file_size) DESC;
|
||||
`
|
||||
|
||||
var ids []string
|
||||
if err := dbWrapper.Select(ctx, &ids, findExactDuplicateQuery, durationDiff); err != nil {
|
||||
args := append(query.allArgs(), durationDiff, durationDiff)
|
||||
if err := dbWrapper.Select(ctx, &ids, finalQuery, args...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
@ -1452,9 +1466,18 @@ func (qb *SceneStore) FindDuplicates(ctx context.Context, distance int, duration
|
|||
}
|
||||
}
|
||||
} else {
|
||||
query.columns = []string{
|
||||
"scenes.id as id",
|
||||
"files_fingerprints.fingerprint as phash",
|
||||
"video_files.duration as duration",
|
||||
}
|
||||
query.sortAndPagination = " ORDER BY files.size DESC"
|
||||
|
||||
sqlStr := query.toSQL(true)
|
||||
|
||||
var hashes []*utils.Phash
|
||||
|
||||
if err := sceneRepository.queryFunc(ctx, findAllPhashesQuery, nil, false, func(rows *sqlx.Rows) error {
|
||||
if err := sceneRepository.queryFunc(ctx, sqlStr, query.allArgs(), false, func(rows *sqlx.Rows) error {
|
||||
phash := utils.Phash{
|
||||
Bucket: -1,
|
||||
Duration: -1,
|
||||
|
|
|
|||
|
|
@ -4631,7 +4631,7 @@ func TestSceneStore_FindDuplicates(t *testing.T) {
|
|||
withRollbackTxn(func(ctx context.Context) error {
|
||||
distance := 0
|
||||
durationDiff := -1.
|
||||
got, err := qb.FindDuplicates(ctx, distance, durationDiff)
|
||||
got, err := qb.FindDuplicates(ctx, distance, durationDiff, nil)
|
||||
if err != nil {
|
||||
t.Errorf("SceneStore.FindDuplicates() error = %v", err)
|
||||
return nil
|
||||
|
|
@ -4641,7 +4641,7 @@ func TestSceneStore_FindDuplicates(t *testing.T) {
|
|||
|
||||
distance = 1
|
||||
durationDiff = -1.
|
||||
got, err = qb.FindDuplicates(ctx, distance, durationDiff)
|
||||
got, err = qb.FindDuplicates(ctx, distance, durationDiff, nil)
|
||||
if err != nil {
|
||||
t.Errorf("SceneStore.FindDuplicates() error = %v", err)
|
||||
return nil
|
||||
|
|
|
|||
Loading…
Reference in a new issue