Improve oshash collision detection and logging (#713)

* Log colliding file when setting hash
* Check for existing using both hashes
This commit is contained in:
WithoutPants 2020-08-08 11:22:25 +10:00 committed by GitHub
parent 5992ff8706
commit 0874852fa8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -178,12 +178,20 @@ func (t *ScanTask) scanScene() {
return return
} }
// check if oshash clashes with existing scene
dupe, _ := qb.FindByOSHash(oshash)
if dupe != nil {
logger.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path)
return
}
ctx := context.TODO() ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil) tx := database.DB.MustBeginTx(ctx, nil)
err = qb.UpdateOSHash(scene.ID, oshash, tx) err = qb.UpdateOSHash(scene.ID, oshash, tx)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
_ = tx.Rollback() tx.Rollback()
return
} else if err := tx.Commit(); err != nil { } else if err := tx.Commit(); err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
} }
@ -197,6 +205,13 @@ func (t *ScanTask) scanScene() {
return return
} }
// check if checksum clashes with existing scene
dupe, _ := qb.FindByChecksum(checksum)
if dupe != nil {
logger.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path)
return
}
ctx := context.TODO() ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil) tx := database.DB.MustBeginTx(ctx, nil)
err = qb.UpdateChecksum(scene.ID, checksum, tx) err = qb.UpdateChecksum(scene.ID, checksum, tx)
@ -240,15 +255,20 @@ func (t *ScanTask) scanScene() {
} }
} }
// check for scene by checksum and oshash - MD5 should be
// redundant, but check both
if checksum != "" {
scene, _ = qb.FindByChecksum(checksum)
}
if scene == nil {
scene, _ = qb.FindByOSHash(oshash)
}
sceneHash := oshash sceneHash := oshash
if t.fileNamingAlgorithm == models.HashAlgorithmMd5 { if t.fileNamingAlgorithm == models.HashAlgorithmMd5 {
sceneHash = checksum sceneHash = checksum
scene, _ = qb.FindByChecksum(sceneHash)
} else if t.fileNamingAlgorithm == models.HashAlgorithmOshash {
scene, _ = qb.FindByOSHash(sceneHash)
} else {
logger.Error("unknown file naming algorithm")
return
} }
t.makeScreenshots(videoFile, sceneHash) t.makeScreenshots(videoFile, sceneHash)