From 0874852fa880e4a38beb368049164ef65b83f79a Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Sat, 8 Aug 2020 11:22:25 +1000 Subject: [PATCH] Improve oshash collision detection and logging (#713) * Log colliding file when setting hash * Check for existing using both hashes --- pkg/manager/task_scan.go | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/pkg/manager/task_scan.go b/pkg/manager/task_scan.go index fde1dd4c1..9e2742ee4 100644 --- a/pkg/manager/task_scan.go +++ b/pkg/manager/task_scan.go @@ -178,12 +178,20 @@ func (t *ScanTask) scanScene() { return } + // check if oshash clashes with existing scene + dupe, _ := qb.FindByOSHash(oshash) + if dupe != nil { + logger.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path) + return + } + ctx := context.TODO() tx := database.DB.MustBeginTx(ctx, nil) err = qb.UpdateOSHash(scene.ID, oshash, tx) if err != nil { logger.Error(err.Error()) - _ = tx.Rollback() + tx.Rollback() + return } else if err := tx.Commit(); err != nil { logger.Error(err.Error()) } @@ -197,6 +205,13 @@ func (t *ScanTask) scanScene() { return } + // check if checksum clashes with existing scene + dupe, _ := qb.FindByChecksum(checksum) + if dupe != nil { + logger.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path) + return + } + ctx := context.TODO() tx := database.DB.MustBeginTx(ctx, nil) err = qb.UpdateChecksum(scene.ID, checksum, tx) @@ -240,15 +255,20 @@ func (t *ScanTask) scanScene() { } } + // check for scene by checksum and oshash - MD5 should be + // redundant, but check both + if checksum != "" { + scene, _ = qb.FindByChecksum(checksum) + } + + if scene == nil { + scene, _ = qb.FindByOSHash(oshash) + } + sceneHash := oshash + if t.fileNamingAlgorithm == models.HashAlgorithmMd5 { sceneHash = checksum - scene, _ = qb.FindByChecksum(sceneHash) - } else if t.fileNamingAlgorithm == models.HashAlgorithmOshash { - scene, _ = qb.FindByOSHash(sceneHash) - } else { - logger.Error("unknown file naming algorithm") - return } t.makeScreenshots(videoFile, sceneHash)