Add option to ignore zip contents during clean (#6700)

* Add option to ignore zip file contents while cleaning

Speeds up the clean process with the assumption that files within zip files are not deleted.

* Add UI for new option
This commit is contained in:
WithoutPants 2026-03-18 15:58:32 +11:00 committed by GitHub
parent f7b66c7ff9
commit 93fbb4be80
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 61 additions and 23 deletions

View file

@ -131,6 +131,14 @@ type ScanMetadataOptions {
input CleanMetadataInput {
paths: [String!]
"""
Don't check zip file contents when determining whether to clean a file.
This can significantly speed up the clean process, but will potentially miss removed files within zip files.
Where users do not modify zip files contents directly, this should be safe to use.
Defaults to false.
"""
ignoreZipFileContents: Boolean
"Do a dry run. Don't delete any files"
dryRun: Boolean!
}

View file

@ -314,6 +314,8 @@ type CleanMetadataInput struct {
Paths []string `json:"paths"`
// Do a dry run. Don't delete any files
DryRun bool `json:"dryRun"`
IgnoreZipFileContents bool `json:"ignoreZipFileContents"`
}
func (s *Manager) Clean(ctx context.Context, input CleanMetadataInput) int {

View file

@ -40,9 +40,10 @@ func (j *cleanJob) Execute(ctx context.Context, progress *job.Progress) error {
}
j.cleaner.Clean(ctx, file.CleanOptions{
Paths: j.input.Paths,
DryRun: j.input.DryRun,
PathFilter: newCleanFilter(instance.Config),
Paths: j.input.Paths,
DryRun: j.input.DryRun,
IgnoreZipFileContents: j.input.IgnoreZipFileContents,
PathFilter: newCleanFilter(instance.Config),
}, progress)
if job.IsCancelled(ctx) {

View file

@ -33,6 +33,11 @@ type cleanJob struct {
type CleanOptions struct {
Paths []string
// IgnoreZipFileContents will skip checking the contents of zip files when determining whether to clean a file.
// This can significantly speed up the clean process, but will potentially miss removed files within zip files.
// Where users do not modify zip files contents directly, this should be safe to use.
IgnoreZipFileContents bool
// Do a dry run. Don't delete any files
DryRun bool
@ -174,13 +179,16 @@ func (j *cleanJob) assessFiles(ctx context.Context, toDelete *deleteSet) error {
more := true
r := j.Repository
includeZipContents := !j.options.IgnoreZipFileContents
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
for more {
if job.IsCancelled(ctx) {
return nil
}
files, err := r.File.FindAllInPaths(ctx, j.options.Paths, batchSize, offset)
files, err := r.File.FindAllInPaths(ctx, j.options.Paths, includeZipContents, batchSize, offset)
if err != nil {
return fmt.Errorf("error querying for files: %w", err)
}
@ -258,6 +266,8 @@ func (j *cleanJob) assessFolders(ctx context.Context, toDelete *deleteSet) error
offset := 0
progress := j.progress
includeZipContents := !j.options.IgnoreZipFileContents
more := true
r := j.Repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
@ -266,7 +276,7 @@ func (j *cleanJob) assessFolders(ctx context.Context, toDelete *deleteSet) error
return nil
}
folders, err := r.Folder.FindAllInPaths(ctx, j.options.Paths, batchSize, offset)
folders, err := r.Folder.FindAllInPaths(ctx, j.options.Paths, includeZipContents, batchSize, offset)
if err != nil {
return fmt.Errorf("error querying for folders: %w", err)
}

View file

@ -153,13 +153,13 @@ func (_m *FileReaderWriter) FindAllByPath(ctx context.Context, path string, case
return r0, r1
}
// FindAllInPaths provides a mock function with given fields: ctx, p, limit, offset
func (_m *FileReaderWriter) FindAllInPaths(ctx context.Context, p []string, limit int, offset int) ([]models.File, error) {
ret := _m.Called(ctx, p, limit, offset)
// FindAllInPaths provides a mock function with given fields: ctx, p, includeZipContents, limit, offset
func (_m *FileReaderWriter) FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit int, offset int) ([]models.File, error) {
ret := _m.Called(ctx, p, includeZipContents, limit, offset)
var r0 []models.File
if rf, ok := ret.Get(0).(func(context.Context, []string, int, int) []models.File); ok {
r0 = rf(ctx, p, limit, offset)
if rf, ok := ret.Get(0).(func(context.Context, []string, bool, int, int) []models.File); ok {
r0 = rf(ctx, p, includeZipContents, limit, offset)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]models.File)
@ -167,8 +167,8 @@ func (_m *FileReaderWriter) FindAllInPaths(ctx context.Context, p []string, limi
}
var r1 error
if rf, ok := ret.Get(1).(func(context.Context, []string, int, int) error); ok {
r1 = rf(ctx, p, limit, offset)
if rf, ok := ret.Get(1).(func(context.Context, []string, bool, int, int) error); ok {
r1 = rf(ctx, p, includeZipContents, limit, offset)
} else {
r1 = ret.Error(1)
}

View file

@ -86,13 +86,13 @@ func (_m *FolderReaderWriter) Find(ctx context.Context, id models.FolderID) (*mo
return r0, r1
}
// FindAllInPaths provides a mock function with given fields: ctx, p, limit, offset
func (_m *FolderReaderWriter) FindAllInPaths(ctx context.Context, p []string, limit int, offset int) ([]*models.Folder, error) {
ret := _m.Called(ctx, p, limit, offset)
// FindAllInPaths provides a mock function with given fields: ctx, p, includeZipContents, limit, offset
func (_m *FolderReaderWriter) FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit int, offset int) ([]*models.Folder, error) {
ret := _m.Called(ctx, p, includeZipContents, limit, offset)
var r0 []*models.Folder
if rf, ok := ret.Get(0).(func(context.Context, []string, int, int) []*models.Folder); ok {
r0 = rf(ctx, p, limit, offset)
if rf, ok := ret.Get(0).(func(context.Context, []string, bool, int, int) []*models.Folder); ok {
r0 = rf(ctx, p, includeZipContents, limit, offset)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*models.Folder)
@ -100,8 +100,8 @@ func (_m *FolderReaderWriter) FindAllInPaths(ctx context.Context, p []string, li
}
var r1 error
if rf, ok := ret.Get(1).(func(context.Context, []string, int, int) error); ok {
r1 = rf(ctx, p, limit, offset)
if rf, ok := ret.Get(1).(func(context.Context, []string, bool, int, int) error); ok {
r1 = rf(ctx, p, includeZipContents, limit, offset)
} else {
r1 = ret.Error(1)
}

View file

@ -14,7 +14,7 @@ type FileGetter interface {
type FileFinder interface {
FileGetter
FindAllByPath(ctx context.Context, path string, caseSensitive bool) ([]File, error)
FindAllInPaths(ctx context.Context, p []string, limit, offset int) ([]File, error)
FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit, offset int) ([]File, error)
FindByPath(ctx context.Context, path string, caseSensitive bool) (File, error)
FindByFingerprint(ctx context.Context, fp Fingerprint) ([]File, error)
FindByZipFileID(ctx context.Context, zipFileID FileID) ([]File, error)

View file

@ -11,7 +11,7 @@ type FolderGetter interface {
// FolderFinder provides methods to find folders.
type FolderFinder interface {
FolderGetter
FindAllInPaths(ctx context.Context, p []string, limit, offset int) ([]*Folder, error)
FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit, offset int) ([]*Folder, error)
FindByPath(ctx context.Context, path string, caseSensitive bool) (*Folder, error)
FindByZipFileID(ctx context.Context, zipFileID FileID) ([]*Folder, error)
FindByParentFolderID(ctx context.Context, parentFolderID FolderID) ([]*Folder, error)

View file

@ -695,7 +695,7 @@ func (qb *FileStore) allInPaths(q *goqu.SelectDataset, p []string) *goqu.SelectD
// FindAllByPaths returns the all files that are within any of the given paths.
// Returns all if limit is < 0.
// Returns all files if p is empty.
func (qb *FileStore) FindAllInPaths(ctx context.Context, p []string, limit, offset int) ([]models.File, error) {
func (qb *FileStore) FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit, offset int) ([]models.File, error) {
table := qb.table()
folderTable := folderTableMgr.table
@ -706,6 +706,10 @@ func (qb *FileStore) FindAllInPaths(ctx context.Context, p []string, limit, offs
q = qb.allInPaths(q, p)
if !includeZipContents {
q = q.Where(table.Col("zip_file_id").IsNull())
}
if limit > -1 {
q = q.Limit(uint(limit))
}

View file

@ -427,10 +427,14 @@ func (qb *FolderStore) allInPaths(q *goqu.SelectDataset, p []string) *goqu.Selec
// FindAllInPaths returns the all folders that are or are within any of the given paths.
// Returns all if limit is < 0.
// Returns all folders if p is empty.
func (qb *FolderStore) FindAllInPaths(ctx context.Context, p []string, limit, offset int) ([]*models.Folder, error) {
func (qb *FolderStore) FindAllInPaths(ctx context.Context, p []string, includeZipContents bool, limit, offset int) ([]*models.Folder, error) {
q := qb.selectDataset().Prepared(true)
q = qb.allInPaths(q, p)
if !includeZipContents {
q = q.Where(qb.table().Col("zip_file_id").IsNull())
}
if limit > -1 {
q = q.Limit(uint(limit))
}

View file

@ -145,6 +145,13 @@ const CleanOptions: React.FC<ICleanOptions> = ({
return (
<>
<BooleanSetting
id="clean-ignore-zip-contents"
checked={options.ignoreZipFileContents ?? false}
headingID="config.tasks.clean_ignore_zip_contents"
subHeadingID="config.tasks.clean_ignore_zip_contents_desc"
onChange={(v) => setOptions({ ignoreZipFileContents: v })}
/>
<BooleanSetting
id="clean-dryrun"
checked={options.dryRun}

View file

@ -527,6 +527,8 @@
"zip": "SQLite database file and blob files will be zipped into a single file, with the filename {filename_format}"
},
"cleanup_desc": "Check for missing files and remove them from the database. This is a destructive action.",
"clean_ignore_zip_contents": "Ignore zip file contents",
"clean_ignore_zip_contents_desc": "Faster but will miss files removed inside zip files. Safe to enable if you don't delete files within zips.",
"clean_generated": {
"blob_files": "Blob files",
"description": "Removes generated files without a corresponding database entry.",