mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 08:26:00 +01:00
* Find existing files with case insensitivity if filesystem is case insensitive * Handle case change in folders * Optimise to only test file system case sensitivity if the first query found nothing This limits the overhead to new paths, and adds an extra query for new paths to windows installs
1208 lines
30 KiB
Go
1208 lines
30 KiB
Go
package file
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/remeh/sizedwaitgroup"
|
|
"github.com/stashapp/stash/pkg/logger"
|
|
"github.com/stashapp/stash/pkg/models"
|
|
"github.com/stashapp/stash/pkg/txn"
|
|
"github.com/stashapp/stash/pkg/utils"
|
|
)
|
|
|
|
const (
|
|
scanQueueSize = 200000
|
|
// maximum number of times to retry in the event of a locked database
|
|
// use -1 to retry forever
|
|
maxRetries = -1
|
|
)
|
|
|
|
// Scanner scans files into the database.
|
|
//
|
|
// The scan process works using two goroutines. The first walks through the provided paths
|
|
// in the filesystem. It runs each directory entry through the provided ScanFilters. If none
|
|
// of the filter Accept methods return true, then the file/directory is ignored.
|
|
// Any folders found are handled immediately. Files inside zip files are also handled immediately.
|
|
// All other files encountered are sent to the second goroutine queue.
|
|
//
|
|
// Folders are handled by checking if the folder exists in the database, by its full path.
|
|
// If a folder entry already exists, then its mod time is updated (if applicable).
|
|
// If the folder does not exist in the database, then a new folder entry its created.
|
|
//
|
|
// Files are handled by first querying for the file by its path. If the file entry exists in the
|
|
// database, then the mod time is compared to the value in the database. If the mod time is different
|
|
// then file is marked as updated - it recalculates any fingerprints and fires decorators, then
|
|
// the file entry is updated and any applicable handlers are fired.
|
|
//
|
|
// If the file entry does not exist in the database, then fingerprints are calculated for the file.
|
|
// It then determines if the file is a rename of an existing file by querying for file entries with
|
|
// the same fingerprint. If any are found, it checks each to see if any are missing in the file
|
|
// system. If one is, then the file is treated as renamed and its path is updated. If none are missing,
|
|
// or many are, then the file is treated as a new file.
|
|
//
|
|
// If the file is not a renamed file, then the decorators are fired and the file is created, then
|
|
// the applicable handlers are fired.
|
|
type Scanner struct {
|
|
FS models.FS
|
|
Repository Repository
|
|
FingerprintCalculator FingerprintCalculator
|
|
|
|
// FileDecorators are applied to files as they are scanned.
|
|
FileDecorators []Decorator
|
|
}
|
|
|
|
// FingerprintCalculator calculates a fingerprint for the provided file.
|
|
type FingerprintCalculator interface {
|
|
CalculateFingerprints(f *models.BaseFile, o Opener, useExisting bool) ([]models.Fingerprint, error)
|
|
}
|
|
|
|
// Decorator wraps the Decorate method to add additional functionality while scanning files.
|
|
type Decorator interface {
|
|
Decorate(ctx context.Context, fs models.FS, f models.File) (models.File, error)
|
|
IsMissingMetadata(ctx context.Context, fs models.FS, f models.File) bool
|
|
}
|
|
|
|
type FilteredDecorator struct {
|
|
Decorator
|
|
Filter
|
|
}
|
|
|
|
// Decorate runs the decorator if the filter accepts the file.
|
|
func (d *FilteredDecorator) Decorate(ctx context.Context, fs models.FS, f models.File) (models.File, error) {
|
|
if d.Accept(ctx, f) {
|
|
return d.Decorator.Decorate(ctx, fs, f)
|
|
}
|
|
return f, nil
|
|
}
|
|
|
|
func (d *FilteredDecorator) IsMissingMetadata(ctx context.Context, fs models.FS, f models.File) bool {
|
|
if d.Accept(ctx, f) {
|
|
return d.Decorator.IsMissingMetadata(ctx, fs, f)
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// ProgressReporter is used to report progress of the scan.
|
|
type ProgressReporter interface {
|
|
AddTotal(total int)
|
|
Increment()
|
|
Definite()
|
|
ExecuteTask(description string, fn func())
|
|
}
|
|
|
|
type scanJob struct {
|
|
*Scanner
|
|
|
|
// handlers are called after a file has been scanned.
|
|
handlers []Handler
|
|
|
|
ProgressReports ProgressReporter
|
|
options ScanOptions
|
|
|
|
startTime time.Time
|
|
fileQueue chan scanFile
|
|
retryList []scanFile
|
|
retrying bool
|
|
folderPathToID sync.Map
|
|
zipPathToID sync.Map
|
|
count int
|
|
|
|
txnRetryer txn.Retryer
|
|
}
|
|
|
|
// ScanOptions provides options for scanning files.
|
|
type ScanOptions struct {
|
|
Paths []string
|
|
|
|
// ZipFileExtensions is a list of file extensions that are considered zip files.
|
|
// Extension does not include the . character.
|
|
ZipFileExtensions []string
|
|
|
|
// ScanFilters are used to determine if a file should be scanned.
|
|
ScanFilters []PathFilter
|
|
|
|
// HandlerRequiredFilters are used to determine if an unchanged file needs to be handled
|
|
HandlerRequiredFilters []Filter
|
|
|
|
ParallelTasks int
|
|
|
|
// When true files in path will be rescanned even if they haven't changed
|
|
Rescan bool
|
|
}
|
|
|
|
// Scan starts the scanning process.
|
|
func (s *Scanner) Scan(ctx context.Context, handlers []Handler, options ScanOptions, progressReporter ProgressReporter) {
|
|
job := &scanJob{
|
|
Scanner: s,
|
|
handlers: handlers,
|
|
ProgressReports: progressReporter,
|
|
options: options,
|
|
txnRetryer: txn.Retryer{
|
|
Manager: s.Repository.TxnManager,
|
|
Retries: maxRetries,
|
|
},
|
|
}
|
|
|
|
job.execute(ctx)
|
|
}
|
|
|
|
type scanFile struct {
|
|
*models.BaseFile
|
|
fs models.FS
|
|
info fs.FileInfo
|
|
}
|
|
|
|
func (s *scanJob) withTxn(ctx context.Context, fn func(ctx context.Context) error) error {
|
|
return s.txnRetryer.WithTxn(ctx, fn)
|
|
}
|
|
|
|
func (s *scanJob) withDB(ctx context.Context, fn func(ctx context.Context) error) error {
|
|
return s.Repository.WithDB(ctx, fn)
|
|
}
|
|
|
|
func (s *scanJob) execute(ctx context.Context) {
|
|
paths := s.options.Paths
|
|
logger.Infof("scanning %d paths", len(paths))
|
|
s.startTime = time.Now()
|
|
|
|
s.fileQueue = make(chan scanFile, scanQueueSize)
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
if err := s.queueFiles(ctx, paths); err != nil {
|
|
if errors.Is(err, context.Canceled) {
|
|
return
|
|
}
|
|
|
|
logger.Errorf("error queuing files for scan: %v", err)
|
|
return
|
|
}
|
|
|
|
logger.Infof("Finished adding files to queue. %d files queued", s.count)
|
|
}()
|
|
|
|
defer wg.Wait()
|
|
|
|
if err := s.processQueue(ctx); err != nil {
|
|
if errors.Is(err, context.Canceled) {
|
|
return
|
|
}
|
|
|
|
logger.Errorf("error scanning files: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
func (s *scanJob) queueFiles(ctx context.Context, paths []string) error {
|
|
var err error
|
|
s.ProgressReports.ExecuteTask("Walking directory tree", func() {
|
|
for _, p := range paths {
|
|
err = symWalk(s.FS, p, s.queueFileFunc(ctx, s.FS, nil))
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
})
|
|
|
|
close(s.fileQueue)
|
|
|
|
if s.ProgressReports != nil {
|
|
s.ProgressReports.AddTotal(s.count)
|
|
s.ProgressReports.Definite()
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (s *scanJob) queueFileFunc(ctx context.Context, f models.FS, zipFile *scanFile) fs.WalkDirFunc {
|
|
return func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
// don't let errors prevent scanning
|
|
logger.Errorf("error scanning %s: %v", path, err)
|
|
return nil
|
|
}
|
|
|
|
if err = ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
info, err := d.Info()
|
|
if err != nil {
|
|
logger.Errorf("reading info for %q: %v", path, err)
|
|
return nil
|
|
}
|
|
|
|
if !s.acceptEntry(ctx, path, info) {
|
|
if info.IsDir() {
|
|
return fs.SkipDir
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
size, err := getFileSize(f, path, info)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ff := scanFile{
|
|
BaseFile: &models.BaseFile{
|
|
DirEntry: models.DirEntry{
|
|
ModTime: modTime(info),
|
|
},
|
|
Path: path,
|
|
Basename: filepath.Base(path),
|
|
Size: size,
|
|
},
|
|
fs: f,
|
|
info: info,
|
|
}
|
|
|
|
if zipFile != nil {
|
|
zipFileID, err := s.getZipFileID(ctx, zipFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ff.ZipFileID = zipFileID
|
|
ff.ZipFile = zipFile
|
|
}
|
|
|
|
if info.IsDir() {
|
|
// handle folders immediately
|
|
if err := s.handleFolder(ctx, ff); err != nil {
|
|
if !errors.Is(err, context.Canceled) {
|
|
logger.Errorf("error processing %q: %v", path, err)
|
|
}
|
|
|
|
// skip the directory since we won't be able to process the files anyway
|
|
return fs.SkipDir
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// if zip file is present, we handle immediately
|
|
if zipFile != nil {
|
|
s.ProgressReports.ExecuteTask("Scanning "+path, func() {
|
|
if err := s.handleFile(ctx, ff); err != nil {
|
|
if !errors.Is(err, context.Canceled) {
|
|
logger.Errorf("error processing %q: %v", path, err)
|
|
}
|
|
// don't return an error, just skip the file
|
|
}
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
s.fileQueue <- ff
|
|
|
|
s.count++
|
|
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func getFileSize(f models.FS, path string, info fs.FileInfo) (int64, error) {
|
|
// #2196/#3042 - replace size with target size if file is a symlink
|
|
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
|
targetInfo, err := f.Stat(path)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("reading info for symlink %q: %w", path, err)
|
|
}
|
|
return targetInfo.Size(), nil
|
|
}
|
|
|
|
return info.Size(), nil
|
|
}
|
|
|
|
func (s *scanJob) acceptEntry(ctx context.Context, path string, info fs.FileInfo) bool {
|
|
// always accept if there's no filters
|
|
accept := len(s.options.ScanFilters) == 0
|
|
for _, filter := range s.options.ScanFilters {
|
|
// accept if any filter accepts the file
|
|
if filter.Accept(ctx, path, info) {
|
|
accept = true
|
|
break
|
|
}
|
|
}
|
|
|
|
return accept
|
|
}
|
|
|
|
func (s *scanJob) scanZipFile(ctx context.Context, f scanFile) error {
|
|
zipFS, err := f.fs.OpenZip(f.Path, f.Size)
|
|
if err != nil {
|
|
if errors.Is(err, errNotReaderAt) {
|
|
// can't walk the zip file
|
|
// just return
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
defer zipFS.Close()
|
|
|
|
return symWalk(zipFS, f.Path, s.queueFileFunc(ctx, zipFS, &f))
|
|
}
|
|
|
|
func (s *scanJob) processQueue(ctx context.Context) error {
|
|
parallelTasks := s.options.ParallelTasks
|
|
if parallelTasks < 1 {
|
|
parallelTasks = 1
|
|
}
|
|
|
|
wg := sizedwaitgroup.New(parallelTasks)
|
|
|
|
if err := func() error {
|
|
defer wg.Wait()
|
|
|
|
for f := range s.fileQueue {
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
wg.Add()
|
|
ff := f
|
|
go func() {
|
|
defer wg.Done()
|
|
s.processQueueItem(ctx, ff)
|
|
}()
|
|
}
|
|
|
|
return nil
|
|
}(); err != nil {
|
|
return err
|
|
}
|
|
|
|
s.retrying = true
|
|
|
|
if err := func() error {
|
|
defer wg.Wait()
|
|
|
|
for _, f := range s.retryList {
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
wg.Add()
|
|
ff := f
|
|
go func() {
|
|
defer wg.Done()
|
|
s.processQueueItem(ctx, ff)
|
|
}()
|
|
}
|
|
|
|
return nil
|
|
}(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *scanJob) incrementProgress(f scanFile) {
|
|
// don't increment for files inside zip files since these aren't
|
|
// counted during the initial walking
|
|
if s.ProgressReports != nil && f.ZipFile == nil {
|
|
s.ProgressReports.Increment()
|
|
}
|
|
}
|
|
|
|
func (s *scanJob) processQueueItem(ctx context.Context, f scanFile) {
|
|
s.ProgressReports.ExecuteTask("Scanning "+f.Path, func() {
|
|
var err error
|
|
if f.info.IsDir() {
|
|
err = s.handleFolder(ctx, f)
|
|
} else {
|
|
err = s.handleFile(ctx, f)
|
|
}
|
|
|
|
if err != nil && !errors.Is(err, context.Canceled) {
|
|
logger.Errorf("error processing %q: %v", f.Path, err)
|
|
}
|
|
})
|
|
}
|
|
|
|
func (s *scanJob) getFolderID(ctx context.Context, path string) (*models.FolderID, error) {
|
|
// check the folder cache first
|
|
if f, ok := s.folderPathToID.Load(path); ok {
|
|
v := f.(models.FolderID)
|
|
return &v, nil
|
|
}
|
|
|
|
// assume case sensitive when searching for the folder
|
|
const caseSensitive = true
|
|
|
|
ret, err := s.Repository.Folder.FindByPath(ctx, path, caseSensitive)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if ret == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
s.folderPathToID.Store(path, ret.ID)
|
|
return &ret.ID, nil
|
|
}
|
|
|
|
func (s *scanJob) getZipFileID(ctx context.Context, zipFile *scanFile) (*models.FileID, error) {
|
|
if zipFile == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
if zipFile.ID != 0 {
|
|
return &zipFile.ID, nil
|
|
}
|
|
|
|
path := zipFile.Path
|
|
|
|
// check the folder cache first
|
|
if f, ok := s.zipPathToID.Load(path); ok {
|
|
v := f.(models.FileID)
|
|
return &v, nil
|
|
}
|
|
|
|
// assume case sensitive when searching for the zip file
|
|
const caseSensitive = true
|
|
|
|
ret, err := s.Repository.File.FindByPath(ctx, path, caseSensitive)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting zip file ID for %q: %w", path, err)
|
|
}
|
|
|
|
if ret == nil {
|
|
return nil, fmt.Errorf("zip file %q doesn't exist in database", zipFile.Path)
|
|
}
|
|
|
|
s.zipPathToID.Store(path, ret.Base().ID)
|
|
return &ret.Base().ID, nil
|
|
}
|
|
|
|
func (s *scanJob) handleFolder(ctx context.Context, file scanFile) error {
|
|
path := file.Path
|
|
|
|
return s.withTxn(ctx, func(ctx context.Context) error {
|
|
defer s.incrementProgress(file)
|
|
|
|
// determine if folder already exists in data store (by path)
|
|
// assume case sensitive by default
|
|
f, err := s.Repository.Folder.FindByPath(ctx, path, true)
|
|
if err != nil {
|
|
return fmt.Errorf("checking for existing folder %q: %w", path, err)
|
|
}
|
|
|
|
// #1426 / #6326 - if folder is in a case-insensitive filesystem, then try
|
|
// case insensitive searching
|
|
// assume case sensitive if in zip
|
|
if f == nil && file.ZipFileID == nil {
|
|
caseSensitive, _ := file.fs.IsPathCaseSensitive(file.Path)
|
|
|
|
if !caseSensitive {
|
|
f, err = s.Repository.Folder.FindByPath(ctx, path, false)
|
|
if err != nil {
|
|
return fmt.Errorf("checking for existing folder %q: %w", path, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// if folder not exists, create it
|
|
if f == nil {
|
|
f, err = s.onNewFolder(ctx, file)
|
|
} else {
|
|
f, err = s.onExistingFolder(ctx, file, f)
|
|
}
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if f != nil {
|
|
s.folderPathToID.Store(f.Path, f.ID)
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func (s *scanJob) onNewFolder(ctx context.Context, file scanFile) (*models.Folder, error) {
|
|
renamed, err := s.handleFolderRename(ctx, file)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if renamed != nil {
|
|
return renamed, nil
|
|
}
|
|
|
|
now := time.Now()
|
|
|
|
toCreate := &models.Folder{
|
|
DirEntry: file.DirEntry,
|
|
Path: file.Path,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
}
|
|
|
|
dir := filepath.Dir(file.Path)
|
|
if dir != "." {
|
|
parentFolderID, err := s.getFolderID(ctx, dir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting parent folder %q: %w", dir, err)
|
|
}
|
|
|
|
// if parent folder doesn't exist, assume it's a top-level folder
|
|
// this may not be true if we're using multiple goroutines
|
|
if parentFolderID != nil {
|
|
toCreate.ParentFolderID = parentFolderID
|
|
}
|
|
}
|
|
|
|
txn.AddPostCommitHook(ctx, func(ctx context.Context) {
|
|
// log at the end so that if anything fails above due to a locked database
|
|
// error and the transaction must be retried, then we shouldn't get multiple
|
|
// logs of the same thing.
|
|
logger.Infof("%s doesn't exist. Creating new folder entry...", file.Path)
|
|
})
|
|
|
|
if err := s.Repository.Folder.Create(ctx, toCreate); err != nil {
|
|
return nil, fmt.Errorf("creating folder %q: %w", file.Path, err)
|
|
}
|
|
|
|
return toCreate, nil
|
|
}
|
|
|
|
func (s *scanJob) handleFolderRename(ctx context.Context, file scanFile) (*models.Folder, error) {
|
|
// ignore folders in zip files
|
|
if file.ZipFileID != nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// check if the folder was moved from elsewhere
|
|
renamedFrom, err := s.detectFolderMove(ctx, file)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("detecting folder move: %w", err)
|
|
}
|
|
|
|
if renamedFrom == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// if the folder was moved, update the existing folder
|
|
logger.Infof("%s moved to %s. Updating path...", renamedFrom.Path, file.Path)
|
|
renamedFrom.Path = file.Path
|
|
|
|
// update the parent folder ID
|
|
// find the parent folder
|
|
parentFolderID, err := s.getFolderID(ctx, filepath.Dir(file.Path))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting parent folder for %q: %w", file.Path, err)
|
|
}
|
|
|
|
renamedFrom.ParentFolderID = parentFolderID
|
|
|
|
if err := s.Repository.Folder.Update(ctx, renamedFrom); err != nil {
|
|
return nil, fmt.Errorf("updating folder for rename %q: %w", renamedFrom.Path, err)
|
|
}
|
|
|
|
// #4146 - correct sub-folders to have the correct path
|
|
if err := correctSubFolderHierarchy(ctx, s.Repository.Folder, renamedFrom); err != nil {
|
|
return nil, fmt.Errorf("correcting sub folder hierarchy for %q: %w", renamedFrom.Path, err)
|
|
}
|
|
|
|
return renamedFrom, nil
|
|
}
|
|
|
|
func (s *scanJob) onExistingFolder(ctx context.Context, f scanFile, existing *models.Folder) (*models.Folder, error) {
|
|
update := false
|
|
|
|
// update if mod time is changed
|
|
entryModTime := f.ModTime
|
|
if !entryModTime.Equal(existing.ModTime) {
|
|
existing.Path = f.Path
|
|
existing.ModTime = entryModTime
|
|
update = true
|
|
}
|
|
|
|
// #6326 - update if path has changed - should only happen if case is
|
|
// changed and filesystem is case insensitive
|
|
if existing.Path != f.Path {
|
|
existing.Path = f.Path
|
|
update = true
|
|
}
|
|
|
|
// update if zip file ID has changed
|
|
fZfID := f.ZipFileID
|
|
existingZfID := existing.ZipFileID
|
|
if fZfID != existingZfID {
|
|
if fZfID == nil {
|
|
existing.ZipFileID = nil
|
|
update = true
|
|
} else if existingZfID == nil || *fZfID != *existingZfID {
|
|
existing.ZipFileID = fZfID
|
|
update = true
|
|
}
|
|
}
|
|
|
|
if update {
|
|
var err error
|
|
if err = s.Repository.Folder.Update(ctx, existing); err != nil {
|
|
return nil, fmt.Errorf("updating folder %q: %w", f.Path, err)
|
|
}
|
|
}
|
|
|
|
return existing, nil
|
|
}
|
|
|
|
func modTime(info fs.FileInfo) time.Time {
|
|
// truncate to seconds, since we don't store beyond that in the database
|
|
return info.ModTime().Truncate(time.Second)
|
|
}
|
|
|
|
func (s *scanJob) handleFile(ctx context.Context, f scanFile) error {
|
|
defer s.incrementProgress(f)
|
|
|
|
var ff models.File
|
|
|
|
// don't use a transaction to check if new or existing
|
|
if err := s.withDB(ctx, func(ctx context.Context) error {
|
|
// determine if file already exists in data store
|
|
// assume case sensitive when searching for the file to begin with
|
|
var err error
|
|
ff, err = s.Repository.File.FindByPath(ctx, f.Path, true)
|
|
if err != nil {
|
|
return fmt.Errorf("checking for existing file %q: %w", f.Path, err)
|
|
}
|
|
|
|
// #1426 / #6326 - if file is in a case-insensitive filesystem, then try
|
|
// case insensitive search
|
|
// assume case sensitive if in zip
|
|
if ff == nil && f.ZipFileID != nil {
|
|
caseSensitive, _ := f.fs.IsPathCaseSensitive(f.Path)
|
|
|
|
if !caseSensitive {
|
|
ff, err = s.Repository.File.FindByPath(ctx, f.Path, false)
|
|
if err != nil {
|
|
return fmt.Errorf("checking for existing file %q: %w", f.Path, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
if ff == nil {
|
|
// returns a file only if it is actually new
|
|
ff, err = s.onNewFile(ctx, f)
|
|
return err
|
|
}
|
|
|
|
ff, err = s.onExistingFile(ctx, f, ff)
|
|
return err
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if ff != nil && s.isZipFile(f.info.Name()) {
|
|
f.BaseFile = ff.Base()
|
|
|
|
// scan zip files with a different context that is not cancellable
|
|
// cancelling while scanning zip file contents results in the scan
|
|
// contents being partially completed
|
|
zipCtx := utils.ValueOnlyContext{Context: ctx}
|
|
|
|
if err := s.scanZipFile(zipCtx, f); err != nil {
|
|
logger.Errorf("Error scanning zip file %q: %v", f.Path, err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *scanJob) isZipFile(path string) bool {
|
|
fExt := filepath.Ext(path)
|
|
for _, ext := range s.options.ZipFileExtensions {
|
|
if strings.EqualFold(fExt, "."+ext) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (models.File, error) {
|
|
now := time.Now()
|
|
|
|
baseFile := f.BaseFile
|
|
path := baseFile.Path
|
|
|
|
baseFile.CreatedAt = now
|
|
baseFile.UpdatedAt = now
|
|
|
|
// find the parent folder
|
|
parentFolderID, err := s.getFolderID(ctx, filepath.Dir(path))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting parent folder for %q: %w", path, err)
|
|
}
|
|
|
|
if parentFolderID == nil {
|
|
// if parent folder doesn't exist, assume it's not yet created
|
|
// add this file to the queue to be created later
|
|
if s.retrying {
|
|
// if we're retrying and the folder still doesn't exist, then it's a problem
|
|
return nil, fmt.Errorf("parent folder for %q doesn't exist", path)
|
|
}
|
|
|
|
s.retryList = append(s.retryList, f)
|
|
return nil, nil
|
|
}
|
|
|
|
baseFile.ParentFolderID = *parentFolderID
|
|
|
|
const useExisting = false
|
|
fp, err := s.calculateFingerprints(f.fs, baseFile, path, useExisting)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
baseFile.SetFingerprints(fp)
|
|
|
|
file, err := s.fireDecorators(ctx, f.fs, baseFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// determine if the file is renamed from an existing file in the store
|
|
// do this after decoration so that missing fields can be populated
|
|
renamed, err := s.handleRename(ctx, file, fp)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if renamed != nil {
|
|
// handle rename should have already handled the contents of the zip file
|
|
// so shouldn't need to scan it again
|
|
// return nil so it doesn't
|
|
return nil, nil
|
|
}
|
|
|
|
// if not renamed, queue file for creation
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.Repository.File.Create(ctx, file); err != nil {
|
|
return fmt.Errorf("creating file %q: %w", path, err)
|
|
}
|
|
|
|
if err := s.fireHandlers(ctx, file, nil); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return file, nil
|
|
}
|
|
|
|
func (s *scanJob) fireDecorators(ctx context.Context, fs models.FS, f models.File) (models.File, error) {
|
|
for _, h := range s.FileDecorators {
|
|
var err error
|
|
f, err = h.Decorate(ctx, fs, f)
|
|
if err != nil {
|
|
return f, err
|
|
}
|
|
}
|
|
|
|
return f, nil
|
|
}
|
|
|
|
func (s *scanJob) fireHandlers(ctx context.Context, f models.File, oldFile models.File) error {
|
|
for _, h := range s.handlers {
|
|
if err := h.Handle(ctx, f, oldFile); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *scanJob) calculateFingerprints(fs models.FS, f *models.BaseFile, path string, useExisting bool) (models.Fingerprints, error) {
|
|
// only log if we're (re)calculating fingerprints
|
|
if !useExisting {
|
|
logger.Infof("Calculating fingerprints for %s ...", path)
|
|
}
|
|
|
|
// calculate primary fingerprint for the file
|
|
fp, err := s.FingerprintCalculator.CalculateFingerprints(f, &fsOpener{
|
|
fs: fs,
|
|
name: path,
|
|
}, useExisting)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("calculating fingerprint for file %q: %w", path, err)
|
|
}
|
|
|
|
return fp, nil
|
|
}
|
|
|
|
func appendFileUnique(v []models.File, toAdd []models.File) []models.File {
|
|
for _, f := range toAdd {
|
|
found := false
|
|
id := f.Base().ID
|
|
for _, vv := range v {
|
|
if vv.Base().ID == id {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
v = append(v, f)
|
|
}
|
|
}
|
|
|
|
return v
|
|
}
|
|
|
|
func (s *scanJob) getFileFS(f *models.BaseFile) (models.FS, error) {
|
|
if f.ZipFile == nil {
|
|
return s.FS, nil
|
|
}
|
|
|
|
fs, err := s.getFileFS(f.ZipFile.Base())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
zipPath := f.ZipFile.Base().Path
|
|
return fs.OpenZip(zipPath, f.Size)
|
|
}
|
|
|
|
func (s *scanJob) handleRename(ctx context.Context, f models.File, fp []models.Fingerprint) (models.File, error) {
|
|
var others []models.File
|
|
|
|
for _, tfp := range fp {
|
|
thisOthers, err := s.Repository.File.FindByFingerprint(ctx, tfp)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting files by fingerprint %v: %w", tfp, err)
|
|
}
|
|
|
|
others = appendFileUnique(others, thisOthers)
|
|
}
|
|
|
|
var missing []models.File
|
|
|
|
fZipID := f.Base().ZipFileID
|
|
for _, other := range others {
|
|
// if file is from a zip file, then only rename if both files are from the same zip file
|
|
otherZipID := other.Base().ZipFileID
|
|
if otherZipID != nil && (fZipID == nil || *otherZipID != *fZipID) {
|
|
continue
|
|
}
|
|
|
|
// if file does not exist, then update it to the new path
|
|
fs, err := s.getFileFS(other.Base())
|
|
if err != nil {
|
|
missing = append(missing, other)
|
|
continue
|
|
}
|
|
|
|
info, err := fs.Lstat(other.Base().Path)
|
|
switch {
|
|
case err != nil:
|
|
missing = append(missing, other)
|
|
case strings.EqualFold(f.Base().Path, other.Base().Path):
|
|
// #1426 - if file exists but is a case-insensitive match for the
|
|
// original filename, and the filesystem is case-insensitive
|
|
// then treat it as a move
|
|
// #6326 - this should now be handled earlier, and this shouldn't be necessary
|
|
if caseSensitive, _ := fs.IsPathCaseSensitive(other.Base().Path); !caseSensitive {
|
|
// treat as a move
|
|
missing = append(missing, other)
|
|
}
|
|
case !s.acceptEntry(ctx, other.Base().Path, info):
|
|
// #4393 - if the file is no longer in the configured library paths, treat it as a move
|
|
logger.Debugf("File %q no longer in library paths. Treating as a move.", other.Base().Path)
|
|
missing = append(missing, other)
|
|
}
|
|
}
|
|
|
|
n := len(missing)
|
|
if n == 0 {
|
|
// no missing files, not a rename
|
|
return nil, nil
|
|
}
|
|
|
|
// assume does not exist, update existing file
|
|
// it's possible that there may be multiple missing files.
|
|
// just use the first one to rename.
|
|
// #4775 - using the new file instance means that any changes made to the existing
|
|
// file will be lost. Update the existing file instead.
|
|
other := missing[0]
|
|
updated := other.Clone()
|
|
updatedBase := updated.Base()
|
|
|
|
fBaseCopy := *(f.Base())
|
|
|
|
oldPath := updatedBase.Path
|
|
newPath := fBaseCopy.Path
|
|
|
|
logger.Infof("%s moved to %s. Updating path...", oldPath, newPath)
|
|
fBaseCopy.ID = updatedBase.ID
|
|
fBaseCopy.CreatedAt = updatedBase.CreatedAt
|
|
fBaseCopy.Fingerprints = updatedBase.Fingerprints
|
|
*updatedBase = fBaseCopy
|
|
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.Repository.File.Update(ctx, updated); err != nil {
|
|
return fmt.Errorf("updating file for rename %q: %w", newPath, err)
|
|
}
|
|
|
|
if s.isZipFile(updatedBase.Basename) {
|
|
if err := transferZipHierarchy(ctx, s.Repository.Folder, s.Repository.File, updatedBase.ID, oldPath, newPath); err != nil {
|
|
return fmt.Errorf("moving zip hierarchy for renamed zip file %q: %w", newPath, err)
|
|
}
|
|
}
|
|
|
|
if err := s.fireHandlers(ctx, updated, other); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return updated, nil
|
|
}
|
|
|
|
func (s *scanJob) isHandlerRequired(ctx context.Context, f models.File) bool {
|
|
accept := len(s.options.HandlerRequiredFilters) == 0
|
|
for _, filter := range s.options.HandlerRequiredFilters {
|
|
// accept if any filter accepts the file
|
|
if filter.Accept(ctx, f) {
|
|
accept = true
|
|
break
|
|
}
|
|
}
|
|
|
|
return accept
|
|
}
|
|
|
|
// isMissingMetadata returns true if the provided file is missing metadata.
|
|
// Missing metadata should only occur after the 32 schema migration.
|
|
// Looks for special values. For numbers, this will be -1. For strings, this
|
|
// will be 'unset'.
|
|
// Missing metadata includes the following:
|
|
// - file size
|
|
// - image format, width or height
|
|
// - video codec, audio codec, format, width, height, framerate or bitrate
|
|
func (s *scanJob) isMissingMetadata(ctx context.Context, f scanFile, existing models.File) bool {
|
|
for _, h := range s.FileDecorators {
|
|
if h.IsMissingMetadata(ctx, f.fs, existing) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (s *scanJob) setMissingMetadata(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
|
|
path := existing.Base().Path
|
|
logger.Infof("Updating metadata for %s", path)
|
|
|
|
existing.Base().Size = f.Size
|
|
|
|
var err error
|
|
existing, err = s.fireDecorators(ctx, f.fs, existing)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// queue file for update
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.Repository.File.Update(ctx, existing); err != nil {
|
|
return fmt.Errorf("updating file %q: %w", path, err)
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return existing, nil
|
|
}
|
|
|
|
func (s *scanJob) setMissingFingerprints(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
|
|
const useExisting = true
|
|
fp, err := s.calculateFingerprints(f.fs, existing.Base(), f.Path, useExisting)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if fp.ContentsChanged(existing.Base().Fingerprints) {
|
|
existing.SetFingerprints(fp)
|
|
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.Repository.File.Update(ctx, existing); err != nil {
|
|
return fmt.Errorf("updating file %q: %w", f.Path, err)
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return existing, nil
|
|
}
|
|
|
|
// returns a file only if it was updated
|
|
func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
|
|
base := existing.Base()
|
|
path := base.Path
|
|
|
|
fileModTime := f.ModTime
|
|
// #6326 - also force a rescan if the basename changed
|
|
updated := !fileModTime.Equal(base.ModTime) || base.Basename != f.Basename
|
|
forceRescan := s.options.Rescan
|
|
|
|
if !updated && !forceRescan {
|
|
return s.onUnchangedFile(ctx, f, existing)
|
|
}
|
|
|
|
oldBase := *base
|
|
|
|
if !updated && forceRescan {
|
|
logger.Infof("rescanning %s", path)
|
|
} else {
|
|
logger.Infof("%s has been updated: rescanning", path)
|
|
}
|
|
|
|
// #6326 - update basename in case it changed
|
|
base.Basename = f.Basename
|
|
base.ModTime = fileModTime
|
|
base.Size = f.Size
|
|
base.UpdatedAt = time.Now()
|
|
|
|
// calculate and update fingerprints for the file
|
|
const useExisting = false
|
|
fp, err := s.calculateFingerprints(f.fs, base, path, useExisting)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.removeOutdatedFingerprints(existing, fp)
|
|
existing.SetFingerprints(fp)
|
|
|
|
existing, err = s.fireDecorators(ctx, f.fs, existing)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// queue file for update
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.Repository.File.Update(ctx, existing); err != nil {
|
|
return fmt.Errorf("updating file %q: %w", path, err)
|
|
}
|
|
|
|
if err := s.fireHandlers(ctx, existing, &oldBase); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return existing, nil
|
|
}
|
|
|
|
func (s *scanJob) removeOutdatedFingerprints(existing models.File, fp models.Fingerprints) {
|
|
// HACK - if no MD5 fingerprint was returned, and the oshash is changed
|
|
// then remove the MD5 fingerprint
|
|
oshash := fp.For(models.FingerprintTypeOshash)
|
|
if oshash == nil {
|
|
return
|
|
}
|
|
|
|
existingOshash := existing.Base().Fingerprints.For(models.FingerprintTypeOshash)
|
|
if existingOshash == nil || *existingOshash == *oshash {
|
|
// missing oshash or same oshash - nothing to do
|
|
return
|
|
}
|
|
|
|
md5 := fp.For(models.FingerprintTypeMD5)
|
|
|
|
if md5 != nil {
|
|
// nothing to do
|
|
return
|
|
}
|
|
|
|
// oshash has changed, MD5 is missing - remove MD5 from the existing fingerprints
|
|
logger.Infof("Removing outdated checksum from %s", existing.Base().Path)
|
|
b := existing.Base()
|
|
b.Fingerprints = b.Fingerprints.Remove(models.FingerprintTypeMD5)
|
|
}
|
|
|
|
// returns a file only if it was updated
|
|
func (s *scanJob) onUnchangedFile(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
|
|
var err error
|
|
|
|
isMissingMetdata := s.isMissingMetadata(ctx, f, existing)
|
|
// set missing information
|
|
if isMissingMetdata {
|
|
existing, err = s.setMissingMetadata(ctx, f, existing)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// calculate missing fingerprints
|
|
existing, err = s.setMissingFingerprints(ctx, f, existing)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
handlerRequired := false
|
|
if err := s.withDB(ctx, func(ctx context.Context) error {
|
|
// check if the handler needs to be run
|
|
handlerRequired = s.isHandlerRequired(ctx, existing)
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !handlerRequired {
|
|
// if this file is a zip file, then we need to rescan the contents
|
|
// as well. We do this by returning the file, instead of nil.
|
|
if isMissingMetdata {
|
|
return existing, nil
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
if err := s.withTxn(ctx, func(ctx context.Context) error {
|
|
if err := s.fireHandlers(ctx, existing, nil); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// if this file is a zip file, then we need to rescan the contents
|
|
// as well. We do this by returning the file, instead of nil.
|
|
return existing, nil
|
|
}
|