fix(sqlite): fix image duplicate detection by scanning phash as integer

This fixes a bug where identical image duplicates were not being detected.

The implementation was incorrectly scanning the phash BLOB into a string and then attempting to parse it as a hex string. Since phashes are stored as 64-bit integers, they were being converted to decimal strings. For phashes with the MSB set (negative when treated as int64), the resulting decimal string started with a '-', which caused the hex parser to fail and skip the image entirely.

Additionally, even for non-negative phashes, parsing a decimal string as hex yielded incorrect hash values.

Scanning directly into the utils.Phash struct (which uses int64) matches how Scene phashes are handled and ensures the hash values are correct.
This commit is contained in:
notsafeforgit 2026-03-20 04:53:39 -07:00
parent b087b6b62a
commit fcc5b51bfd

View file

@ -1105,29 +1105,13 @@ func (qb *ImageStore) FindDuplicates(ctx context.Context, distance int) ([][]*mo
var hashes []*utils.Phash
if err := imageRepository.queryFunc(ctx, query, nil, false, func(rows *sqlx.Rows) error {
var sq struct {
ID int `db:"id"`
Phash *string `db:"phash"`
}
if err := rows.StructScan(&sq); err != nil {
return err
}
if sq.Phash == nil {
return nil
}
hashInt, err := utils.StringToPhash(*sq.Phash)
if err != nil {
return nil
}
phash := utils.Phash{
ID: sq.ID,
Hash: hashInt,
Bucket: -1,
Duration: -1,
}
if err := rows.StructScan(&phash); err != nil {
return err
}
hashes = append(hashes, &phash)
return nil