[Files Refactor] Don't require fingerprint calculation post-migrate (#2892)

This commit is contained in:
WithoutPants 2022-09-07 14:21:10 +10:00 committed by GitHub
parent cc9fc2150e
commit 9e08edc76f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 125 additions and 41 deletions

View file

@ -7,7 +7,6 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"time"
@ -196,7 +195,7 @@ func (t *ImportTask) ImportPerformers(ctx context.Context) {
logger.Info("[performers] importing")
path := t.json.json.Performers
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[performers] failed to read performers directory: %v", err)
@ -239,7 +238,7 @@ func (t *ImportTask) ImportStudios(ctx context.Context) {
logger.Info("[studios] importing")
path := t.json.json.Studios
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[studios] failed to read studios directory: %v", err)
@ -328,7 +327,7 @@ func (t *ImportTask) ImportMovies(ctx context.Context) {
logger.Info("[movies] importing")
path := t.json.json.Movies
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[movies] failed to read movies directory: %v", err)
@ -373,7 +372,7 @@ func (t *ImportTask) ImportFiles(ctx context.Context) {
logger.Info("[files] importing")
path := t.json.json.Files
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[files] failed to read files directory: %v", err)
@ -463,7 +462,7 @@ func (t *ImportTask) ImportGalleries(ctx context.Context) {
logger.Info("[galleries] importing")
path := t.json.json.Galleries
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[galleries] failed to read galleries directory: %v", err)
@ -515,7 +514,7 @@ func (t *ImportTask) ImportTags(ctx context.Context) {
logger.Info("[tags] importing")
path := t.json.json.Tags
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[tags] failed to read tags directory: %v", err)
@ -650,7 +649,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
logger.Info("[scenes] importing")
path := t.json.json.Scenes
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[scenes] failed to read scenes directory: %v", err)
@ -727,7 +726,7 @@ func (t *ImportTask) ImportImages(ctx context.Context) {
logger.Info("[images] importing")
path := t.json.json.Images
files, err := ioutil.ReadDir(path)
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[images] failed to read images directory: %v", err)

View file

@ -343,8 +343,10 @@ func (s *scanJob) processQueue(ctx context.Context) error {
return nil
}
func (s *scanJob) incrementProgress() {
if s.ProgressReports != nil {
func (s *scanJob) incrementProgress(f scanFile) {
// don't increment for files inside zip files since these aren't
// counted during the initial walking
if s.ProgressReports != nil && f.zipFile == nil {
s.ProgressReports.Increment()
}
}
@ -418,7 +420,7 @@ func (s *scanJob) handleFolder(ctx context.Context, file scanFile) error {
path := file.Path
return s.withTxn(ctx, func(ctx context.Context) error {
defer s.incrementProgress()
defer s.incrementProgress(file)
// determine if folder already exists in data store (by path)
f, err := s.Repository.FolderStore.FindByPath(ctx, path)
@ -579,7 +581,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) {
// add this file to the queue to be created later
if s.retrying {
// if we're retrying and the folder still doesn't exist, then it's a problem
s.incrementProgress()
s.incrementProgress(f)
return nil, fmt.Errorf("parent folder for %q doesn't exist", path)
}
@ -591,7 +593,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) {
zipFileID, err := s.getZipFileID(ctx, f.zipFile)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}
@ -601,7 +603,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) {
fp, err := s.calculateFingerprints(f.fs, baseFile, path)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}
@ -609,7 +611,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) {
file, err := s.fireDecorators(ctx, f.fs, baseFile)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}
@ -617,7 +619,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) {
// do this after decoration so that missing fields can be populated
renamed, err := s.handleRename(ctx, file, fp)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}
@ -785,6 +787,63 @@ func (s *scanJob) isHandlerRequired(ctx context.Context, f File) bool {
return accept
}
// isMissingMetadata returns true if the provided file is missing metadata.
// Missing metadata should only occur after the 32 schema migration.
// Looks for special values. For numbers, this will be -1. For strings, this
// will be 'unset'.
// Missing metadata includes the following:
// - file size
// - image format, width or height
// - video codec, audio codec, format, width, height, framerate or bitrate
func (s *scanJob) isMissingMetadata(existing File) bool {
const (
unsetString = "unset"
unsetNumber = -1
)
if existing.Base().Size == unsetNumber {
return true
}
switch f := existing.(type) {
case *ImageFile:
return f.Format == unsetString || f.Width == unsetNumber || f.Height == unsetNumber
case *VideoFile:
return f.VideoCodec == unsetString || f.AudioCodec == unsetString ||
f.Format == unsetString || f.Width == unsetNumber ||
f.Height == unsetNumber || f.FrameRate == unsetNumber ||
f.BitRate == unsetNumber
}
return false
}
func (s *scanJob) setMissingMetadata(ctx context.Context, f scanFile, existing File) (File, error) {
path := existing.Base().Path
logger.Infof("Setting missing metadata for %s", path)
existing.Base().Size = f.Size
var err error
existing, err = s.fireDecorators(ctx, f.fs, existing)
if err != nil {
return nil, err
}
// queue file for update
if err := s.withTxn(ctx, func(ctx context.Context) error {
if err := s.Repository.Update(ctx, existing); err != nil {
return fmt.Errorf("updating file %q: %w", path, err)
}
return nil
}); err != nil {
return nil, err
}
return existing, nil
}
// returns a file only if it was updated
func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) (File, error) {
base := existing.Base()
@ -794,6 +853,16 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File)
updated := !fileModTime.Equal(base.ModTime)
if !updated {
isMissingMetdata := s.isMissingMetadata(existing)
// set missing information
if isMissingMetdata {
var err error
existing, err = s.setMissingMetadata(ctx, f, existing)
if err != nil {
return nil, err
}
}
handlerRequired := false
if err := s.withDB(ctx, func(ctx context.Context) error {
// check if the handler needs to be run
@ -804,7 +873,14 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File)
}
if !handlerRequired {
s.incrementProgress()
s.incrementProgress(f)
// if this file is a zip file, then we need to rescan the contents
// as well. We do this by returning the file, instead of nil.
if isMissingMetdata {
return existing, nil
}
return nil, nil
}
@ -813,12 +889,18 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File)
return err
}
s.incrementProgress()
s.incrementProgress(f)
return nil
}); err != nil {
return nil, err
}
// if this file is a zip file, then we need to rescan the contents
// as well. We do this by returning the file, instead of nil.
if isMissingMetdata {
return existing, nil
}
return nil, nil
}
@ -830,7 +912,7 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File)
// calculate and update fingerprints for the file
fp, err := s.calculateFingerprints(f.fs, base, path)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}
@ -838,7 +920,7 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File)
existing, err = s.fireDecorators(ctx, f.fs, existing)
if err != nil {
s.incrementProgress()
s.incrementProgress(f)
return nil, err
}

View file

@ -4,7 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"io"
"os"
"path/filepath"
"strings"
@ -104,7 +104,7 @@ func LoadFileFile(filePath string) (DirEntry, error) {
}
defer r.Close()
data, err := ioutil.ReadAll(r)
data, err := io.ReadAll(r)
if err != nil {
return nil, err
}

View file

@ -170,9 +170,9 @@ INSERT INTO `files`
SELECT
`path`,
1,
COALESCE(`size`, 0),
-- set mod time to epoch so that it the format/size is calculated on scan
'1970-01-01 00:00:00',
-- special value if null so that it is recalculated
COALESCE(`size`, -1),
COALESCE(`file_mod_time`, '1970-01-01 00:00:00'),
`created_at`,
`updated_at`
FROM `images`;
@ -186,9 +186,10 @@ INSERT INTO `image_files`
)
SELECT
`files`.`id`,
'',
COALESCE(`images`.`width`, 0),
COALESCE(`images`.`height`, 0)
-- special values so that they are recalculated
'unset',
COALESCE(`images`.`width`, -1),
COALESCE(`images`.`height`, -1)
FROM `images` INNER JOIN `files` ON `images`.`path` = `files`.`basename` AND `files`.`parent_folder_id` = 1;
INSERT INTO `images_files`
@ -280,8 +281,9 @@ INSERT INTO `files`
SELECT
`path`,
1,
0,
'1970-01-01 00:00:00', -- set to placeholder so that size is updated
-- special value so that it is recalculated
-1,
COALESCE(`file_mod_time`, '1970-01-01 00:00:00'),
`created_at`,
`updated_at`
FROM `galleries`
@ -433,9 +435,9 @@ INSERT INTO `files`
SELECT
`path`,
1,
COALESCE(`size`, 0),
-- set mod time to epoch so that it the format/size is calculated on scan
'1970-01-01 00:00:00',
-- special value if null so that it is recalculated
COALESCE(`size`, -1),
COALESCE(`file_mod_time`, '1970-01-01 00:00:00'),
`created_at`,
`updated_at`
FROM `scenes`;
@ -457,13 +459,14 @@ INSERT INTO `video_files`
SELECT
`files`.`id`,
`scenes`.`duration`,
COALESCE(`scenes`.`video_codec`, ''),
COALESCE(`scenes`.`format`, ''),
COALESCE(`scenes`.`audio_codec`, ''),
COALESCE(`scenes`.`width`, 0),
COALESCE(`scenes`.`height`, 0),
COALESCE(`scenes`.`framerate`, 0),
COALESCE(`scenes`.`bitrate`, 0),
-- special values for unset to be updated during scan
COALESCE(`scenes`.`video_codec`, 'unset'),
COALESCE(`scenes`.`format`, 'unset'),
COALESCE(`scenes`.`audio_codec`, 'unset'),
COALESCE(`scenes`.`width`, -1),
COALESCE(`scenes`.`height`, -1),
COALESCE(`scenes`.`framerate`, -1),
COALESCE(`scenes`.`bitrate`, -1),
`scenes`.`interactive`,
`scenes`.`interactive_speed`
FROM `scenes` INNER JOIN `files` ON `scenes`.`path` = `files`.`basename` AND `files`.`parent_folder_id` = 1;