diff --git a/server/common/plugin.go b/server/common/plugin.go index f09375d5..25d82347 100644 --- a/server/common/plugin.go +++ b/server/common/plugin.go @@ -22,9 +22,14 @@ var Hooks = struct { }{ Get: Get{}, Register: Register{}, - All: All{}, } +/* + * ProcessFileContentBeforeSend is a processing hooks used in plugins like: + * 1. pluggable image transcoding service: plg_image_light, plg_image_bimg, plg_image_golang + * 2. video transcoding service: plg_video_transcode + * 3. disallow certain type of file: plg_security_svg + */ var process_file_content_before_send []func(io.ReadCloser, *App, *http.ResponseWriter, *http.Request) (io.ReadCloser, error) func (this Register) ProcessFileContentBeforeSend(fn func(io.ReadCloser, *App, *http.ResponseWriter, *http.Request) (io.ReadCloser, error)) { @@ -34,6 +39,14 @@ func (this Get) ProcessFileContentBeforeSend() []func(io.ReadCloser, *App, *http return process_file_content_before_send } +/* + * HttpEndpoint is a hook that makes it possible to register new endpoint in the application. + * It is used in plugin like: + * 1. plg_video_transcoder to server the transcoded video segment via hls + * 2. plg_editor_onlyoffice to server the content for a custom type in an iframe + * 3. plg_handler_syncthing to create better integration with syncthing + * 4. plg_handler_console to server a full blown console for debugging the application + */ var http_endpoint []func(*mux.Router, *App) error func (this Register) HttpEndpoint(fn func(*mux.Router, *App) error) { @@ -43,6 +56,14 @@ func (this Get) HttpEndpoint() []func(*mux.Router, *App) error { return http_endpoint } +/* + * Starter is the meat that let us connect to a wide variety of server like: + * - plg_starter_http which is the default that server the application under 8334 + * - plg_starter_tor to serve the application via tor + * - plg_starter_web that create ssl certificate via letsencrypt + * - plg_started_http2 to create an HTTP2 server + * - ... + */ var starter_process []func(*mux.Router) func (this Register) Starter(fn func(*mux.Router)) { @@ -52,16 +73,55 @@ func (this Get) Starter() []func(*mux.Router) { return starter_process } +/* + * AuthenticationMiddleware is what enabled us to authenticate user via different means: + * - plg_authentication_admin to enable connection to an admin + * - plg_authentication_saml + * - plg_authentication_openid + * - plg_authentication_ldap + * - ... + */ var authentication_middleware map[string]IAuth = make(map[string]IAuth, 0) func (this Register) AuthenticationMiddleware(id string, am IAuth) { authentication_middleware[id] = am } -func (this All) AuthenticationMiddleware() map[string]IAuth { +func (this Get) AuthenticationMiddleware() map[string]IAuth { return authentication_middleware } +/* + * AuthorisationMiddleware is to enable custom rule for authorisation. eg: anonymous can see, registered + * user can see/edit some files but not some others, admin can do everything + */ +var authorisation_middleware []IAuthorisation + +func (this Register) AuthorisationMiddleware(a IAuthorisation) { + authorisation_middleware = append(authorisation_middleware, a) +} + +func (this Get) AuthorisationMiddleware() []IAuthorisation { + return authorisation_middleware +} + +/* + * Search is the pluggable search mechanism. By default, there's 2 options: + * - plg_search_stateless which does stateless search based on filename only + * - plg_search_statefull which does full text search with a sqlite data store + * The idea here is to enable different type of usage like leveraging elastic search or solr + * with custom stuff around it + */ +var search ISearch + +func (this Register) SearchEngine(s ISearch) { + search = s +} + +func (this Get) SearchEngine() ISearch { + return search +} + /* * UI Overrides * They are the means by which server plugin change the frontend behaviors. diff --git a/server/common/types.go b/server/common/types.go index 5e3f65c5..060d4a7a 100644 --- a/server/common/types.go +++ b/server/common/types.go @@ -26,6 +26,25 @@ type IAuth interface { Callback(formData map[string]string, idpParams map[string]string, res http.ResponseWriter) (map[string]string, error) } +type IAuthorisation interface { + Ls(ctx App, path string) error + Cat(ctx App, path string) error + Mkdir(ctx App, path string) error + Rm(ctx App, path string) error + Mv(ctx App, from string, to string) error + Save(ctx App, path string) error + Touch(ctx App, path string) error +} + +type IFile interface { + os.FileInfo + Path() string +} + +type ISearch interface { + Query(ctx App, basePath string, term string) ([]IFile, error) +} + type File struct { FName string `json:"name"` FType string `json:"type"` @@ -65,6 +84,10 @@ func (f File) Sys() interface{} { return nil } +func (f File) Path() string { + return f.FPath +} + type Metadata struct { CanSee *bool `json:"can_read,omitempty"` CanCreateFile *bool `json:"can_create_file,omitempty"` diff --git a/server/ctrl/admin.go b/server/ctrl/admin.go index 78242828..9a396a4a 100644 --- a/server/ctrl/admin.go +++ b/server/ctrl/admin.go @@ -86,7 +86,7 @@ func AdminBackend(ctx App, res http.ResponseWriter, req *http.Request) { } func AdminAuthenticationMiddleware(ctx App, res http.ResponseWriter, req *http.Request) { - drivers := Hooks.All.AuthenticationMiddleware() + drivers := Hooks.Get.AuthenticationMiddleware() middlewares := make(map[string]Form, len(drivers)) for id, driver := range drivers { middlewares[id] = driver.Setup() diff --git a/server/ctrl/files.go b/server/ctrl/files.go index 4f58bcad..3846b142 100644 --- a/server/ctrl/files.go +++ b/server/ctrl/files.go @@ -64,6 +64,13 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) { SendErrorResult(res, err) return } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Ls(ctx, path); err != nil { + Log.Info("ls::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } entries, err := ctx.Backend.Ls(path) if err != nil { @@ -71,7 +78,6 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) { SendErrorResult(res, err) return } - go model.SProc.HintLs(&ctx, path) files := make([]FileInfo, len(entries)) etagger := fnv.New32() @@ -148,6 +154,14 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) { return } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Cat(ctx, path); err != nil { + Log.Info("cat::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + var file io.ReadCloser var contentLength int64 = -1 var needToCreateCache bool = false @@ -177,7 +191,6 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) { if req.Header.Get("range") != "" { needToCreateCache = true } - go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/") } // plugin hooks @@ -350,6 +363,14 @@ func FileSave(ctx App, res http.ResponseWriter, req *http.Request) { } } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Save(ctx, path); err != nil { + Log.Info("save::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + err = ctx.Backend.Save(path, req.Body) req.Body.Close() if err != nil { @@ -357,8 +378,6 @@ func FileSave(ctx App, res http.ResponseWriter, req *http.Request) { SendErrorResult(res, NewError(err.Error(), 403)) return } - go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/") - go model.SProc.HintFile(&ctx, path) SendSuccessResult(res, nil) } @@ -387,15 +406,20 @@ func FileMv(ctx App, res http.ResponseWriter, req *http.Request) { return } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Mv(ctx, from, to); err != nil { + Log.Info("mv::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + err = ctx.Backend.Mv(from, to) if err != nil { Log.Debug("mv::backend '%s'", err.Error()) SendErrorResult(res, err) return } - - go model.SProc.HintRm(&ctx, filepath.Dir(from)+"/") - go model.SProc.HintLs(&ctx, filepath.Dir(to)+"/") SendSuccessResult(res, nil) } @@ -412,13 +436,21 @@ func FileRm(ctx App, res http.ResponseWriter, req *http.Request) { SendErrorResult(res, err) return } + + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Rm(ctx, path); err != nil { + Log.Info("rm::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + err = ctx.Backend.Rm(path) if err != nil { Log.Debug("rm::backend '%s'", err.Error()) SendErrorResult(res, err) return } - model.SProc.HintRm(&ctx, path) SendSuccessResult(res, nil) } @@ -436,13 +468,20 @@ func FileMkdir(ctx App, res http.ResponseWriter, req *http.Request) { return } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Mkdir(ctx, path); err != nil { + Log.Info("mkdir::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + err = ctx.Backend.Mkdir(path) if err != nil { Log.Debug("mkdir::backend '%s'", err.Error()) SendErrorResult(res, err) return } - go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/") SendSuccessResult(res, nil) } @@ -460,13 +499,20 @@ func FileTouch(ctx App, res http.ResponseWriter, req *http.Request) { return } + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Touch(ctx, path); err != nil { + Log.Info("touch::auth '%s'", err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } + err = ctx.Backend.Touch(path) if err != nil { Log.Debug("touch::backend '%s'", err.Error()) SendErrorResult(res, err) return } - go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/") SendSuccessResult(res, nil) } @@ -545,6 +591,19 @@ func FileDownloader(ctx App, res http.ResponseWriter, req *http.Request) { } else { zipRoot = strings.TrimSuffix(paths[i], filepath.Base(paths[i])) } + + for _, auth := range Hooks.Get.AuthorisationMiddleware() { + if err = auth.Ls(ctx, paths[i]); err != nil { + Log.Info("downloader::ls::auth path['%s'] => '%s'", paths[i], err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + if err = auth.Cat(ctx, paths[i]); err != nil { + Log.Info("downloader::cat::auth path['%s'] => '%s'", paths[i], err.Error()) + SendErrorResult(res, ErrNotAuthorized) + return + } + } addToZipRecursive(ctx, zipWriter, paths[i], zipRoot) } } diff --git a/server/ctrl/search.go b/server/ctrl/search.go index 5aa31d7c..ca0b4559 100644 --- a/server/ctrl/search.go +++ b/server/ctrl/search.go @@ -19,19 +19,35 @@ func FileSearch(ctx App, res http.ResponseWriter, req *http.Request) { return } - var searchResults []File - if Config.Get("features.search.enable").Bool() { - searchResults = model.SearchStateful(&ctx, path, q) - } else { - searchResults = model.SearchStateLess(&ctx, path, q) + var searchResults []IFile + searchEngine := Hooks.Get.SearchEngine() + if searchEngine == nil { + SendErrorResult(res, ErrMissingDependency) + return + } + searchResults, err = searchEngine.Query(ctx, path, q) + if err != nil { + SendErrorResult(res, err) + return } + // overwrite the path of a file according to chroot if ctx.Session["path"] != "" { for i := 0; i < len(searchResults); i++ { - searchResults[i].FPath = "/" + strings.TrimPrefix( - searchResults[i].FPath, - ctx.Session["path"], - ) + searchResults[i] = File{ + FName: searchResults[i].Name(), + FSize: searchResults[i].Size(), + FType: func() string { + if searchResults[i].IsDir() { + return "directory" + } + return "file" + }(), + FPath: "/" + strings.TrimPrefix( + searchResults[i].Path(), + ctx.Session["path"], + ), + } } } SendSuccessResults(res, searchResults) diff --git a/server/ctrl/session.go b/server/ctrl/session.go index 2ef369f7..bfab890a 100644 --- a/server/ctrl/session.go +++ b/server/ctrl/session.go @@ -170,7 +170,7 @@ func SessionAuthMiddleware(ctx App, res http.ResponseWriter, req *http.Request) if selectedPluginId == "" { return nil } - for key, plugin := range Hooks.All.AuthenticationMiddleware() { + for key, plugin := range Hooks.Get.AuthenticationMiddleware() { if key == selectedPluginId { return plugin } diff --git a/server/model/formater/README.md b/server/model/formater/README.md new file mode 100644 index 00000000..c3950371 --- /dev/null +++ b/server/model/formater/README.md @@ -0,0 +1,7 @@ +This is a bare bone utilities to convert a stream onto text for full text search purpose. +There's some other alternative but none of them run with a small footprint. + +At the moment it supports: +- office documents +- pdf (TODO: remove dependency on pdftotext) +- text base files diff --git a/server/plugin/index.go b/server/plugin/index.go index f23f16fb..d4443ce0 100644 --- a/server/plugin/index.go +++ b/server/plugin/index.go @@ -25,6 +25,7 @@ import ( _ "github.com/mickael-kerjean/filestash/server/plugin/plg_handler_console" _ "github.com/mickael-kerjean/filestash/server/plugin/plg_handler_syncthing" _ "github.com/mickael-kerjean/filestash/server/plugin/plg_image_light" + _ "github.com/mickael-kerjean/filestash/server/plugin/plg_search_stateless" _ "github.com/mickael-kerjean/filestash/server/plugin/plg_security_scanner" _ "github.com/mickael-kerjean/filestash/server/plugin/plg_security_svg" _ "github.com/mickael-kerjean/filestash/server/plugin/plg_starter_http" diff --git a/server/plugin/plg_search_sqlitefts/config.go b/server/plugin/plg_search_sqlitefts/config.go new file mode 100644 index 00000000..84853b52 --- /dev/null +++ b/server/plugin/plg_search_sqlitefts/config.go @@ -0,0 +1,152 @@ +package plg_search_sqlitefts + +import ( + . "github.com/mickael-kerjean/filestash/server/common" + "time" +) + +var ( + SEARCH_ENABLE func() bool + SEARCH_PROCESS_MAX func() int + SEARCH_PROCESS_PAR func() int + SEARCH_REINDEX func() int + CYCLE_TIME func() int + INDEXING_EXT func() string + MAX_INDEXING_FSIZE func() int + INDEXING_EXCLUSION = []string{"/node_modules/", "/bower_components/", "/.cache/", "/.npm/", "/.git/"} +) + +func init() { + SEARCH_ENABLE = func() bool { + return Config.Get("features.search.enable").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Name = "enable" + f.Type = "enable" + f.Target = []string{"process_max", "process_par", "reindex_time", "cycle_time", "max_size", "indexer_ext"} + f.Description = "Enable/Disable full text search" + f.Placeholder = "Default: false" + f.Default = false + return f + }).Bool() + } + SEARCH_ENABLE() + SEARCH_PROCESS_MAX = func() int { + return Config.Get("features.search.process_max").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "process_max" + f.Name = "process_max" + f.Type = "number" + f.Description = "Size of the pool containing the indexers" + f.Placeholder = "Default: 5" + f.Default = 5 + return f + }).Int() + } + SEARCH_PROCESS_MAX() + SEARCH_PROCESS_PAR = func() int { + return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "process_par" + f.Name = "process_par" + f.Type = "number" + f.Description = "How many concurrent indexers are running in the same time (requires a restart)" + f.Placeholder = "Default: 2" + f.Default = 2 + return f + }).Int() + } + SEARCH_PROCESS_PAR() + SEARCH_REINDEX = func() int { + return Config.Get("features.search.reindex_time").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "reindex_time" + f.Name = "reindex_time" + f.Type = "number" + f.Description = "Time in hours after which we consider our index to be stale and needs to be reindexed" + f.Placeholder = "Default: 24h" + f.Default = 24 + return f + }).Int() + } + SEARCH_REINDEX() + CYCLE_TIME = func() int { + return Config.Get("features.search.cycle_time").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "cycle_time" + f.Name = "cycle_time" + f.Type = "number" + f.Description = "Time the indexer needs to spend for each cycle in seconds (discovery, indexing and maintenance)" + f.Placeholder = "Default: 10s" + f.Default = 10 + return f + }).Int() + } + CYCLE_TIME() + MAX_INDEXING_FSIZE = func() int { + return Config.Get("features.search.max_size").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "max_size" + f.Name = "max_size" + f.Type = "number" + f.Description = "Maximum size of files the indexer will perform full text search" + f.Placeholder = "Default: 524288000 => 512MB" + f.Default = 524288000 + return f + }).Int() + } + MAX_INDEXING_FSIZE() + INDEXING_EXT = func() string { + return Config.Get("features.search.indexer_ext").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Id = "indexer_ext" + f.Name = "indexer_ext" + f.Type = "string" + f.Description = "File extension we want to see indexed" + f.Placeholder = "Default: org,txt,docx,pdf,md,form" + f.Default = "org,txt,docx,pdf,md,form" + return f + }).String() + } + INDEXING_EXT() + + onChange := Config.ListenForChange() + runner := func() { + startSearch := false + for { + if SEARCH_ENABLE() == false { + select { + case <-onChange.Listener: + startSearch = SEARCH_ENABLE() + } + if startSearch == false { + continue + } + } + sidx := SProc.Peek() + if sidx == nil { + time.Sleep(5 * time.Second) + continue + } + sidx.mu.Lock() + sidx.Execute() + sidx.mu.Unlock() + } + } + for i := 0; i < SEARCH_PROCESS_PAR(); i++ { + go runner() + } +} diff --git a/server/plugin/plg_search_sqlitefts/crawlstate.go b/server/plugin/plg_search_sqlitefts/crawlstate.go new file mode 100644 index 00000000..d14d8087 --- /dev/null +++ b/server/plugin/plg_search_sqlitefts/crawlstate.go @@ -0,0 +1,121 @@ +package plg_search_sqlitefts + +import ( + "container/heap" + . "github.com/mickael-kerjean/filestash/server/common" + "path/filepath" + "sync" +) + +var SProc SearchProcess = SearchProcess{ + idx: make([]SearchIndexer, 0), + n: -1, +} + +type SearchProcess struct { + idx []SearchIndexer + n int + mu sync.RWMutex +} + +func (this *SearchProcess) HintLs(app *App, path string) *SearchIndexer { + id := GenerateID(app) + + // try to find the search indexer among the existing ones + this.mu.RLock() + for i := len(this.idx) - 1; i >= 0; i-- { + if id == this.idx[i].Id { + alreadyHasPath := false + for j := 0; j < len(this.idx[i].FoldersUnknown); j++ { + if this.idx[i].FoldersUnknown[j].Path == path { + alreadyHasPath = true + break + } + } + if alreadyHasPath == false { + heap.Push(&this.idx[i].FoldersUnknown, &Document{ + Type: "directory", + Path: path, + InitialPath: path, + Name: filepath.Base(path), + }) + } + ret := &this.idx[i] + this.mu.RUnlock() + return ret + } + } + this.mu.RUnlock() + + // Having all indexers running in memory could be expensive => instead we're cycling a pool + search_process_max := SEARCH_PROCESS_MAX() + this.mu.Lock() + lenIdx := len(this.idx) + if lenIdx > 0 && search_process_max > 0 && lenIdx > (search_process_max-1) { + toDel := this.idx[0 : lenIdx-(search_process_max-1)] + for i := range toDel { + toDel[i].DB.Close() + } + this.idx = this.idx[lenIdx-(search_process_max-1):] + } + // instantiate the new indexer + s := NewSearchIndexer(id, app.Backend) + heap.Push(&s.FoldersUnknown, &Document{ + Type: "directory", + Path: path, + InitialPath: path, + Name: filepath.Base(path), + }) + this.idx = append(this.idx, s) + this.mu.Unlock() + return &s +} + +func (this *SearchProcess) HintRm(app *App, path string) { + id := GenerateID(app) + this.mu.RLock() + for i := len(this.idx) - 1; i >= 0; i-- { + if id == this.idx[i].Id { + this.idx[i].DB.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path+"~") + break + } + } + this.mu.RUnlock() +} + +func (this *SearchProcess) HintFile(app *App, path string) { + id := GenerateID(app) + this.mu.RLock() + for i := len(this.idx) - 1; i >= 0; i-- { + if id == this.idx[i].Id { + this.idx[i].DB.Exec("UPDATE file set indexTime = NULL WHERE path = ?", path) + break + } + } + this.mu.RUnlock() +} + +func (this *SearchProcess) Peek() *SearchIndexer { + if len(this.idx) == 0 { + return nil + } + this.mu.Lock() + if this.n >= len(this.idx)-1 || this.n < 0 { + this.n = 0 + } else { + this.n = this.n + 1 + } + s := &this.idx[this.n] + this.mu.Unlock() + return s +} + +func (this *SearchProcess) Reset() { + this.mu.Lock() + for i := range this.idx { + this.idx[i].DB.Close() + } + this.idx = make([]SearchIndexer, 0) + this.mu.Unlock() + this.n = -1 +} diff --git a/server/plugin/plg_search_sqlitefts/index.go b/server/plugin/plg_search_sqlitefts/index.go new file mode 100644 index 00000000..af609d3a --- /dev/null +++ b/server/plugin/plg_search_sqlitefts/index.go @@ -0,0 +1,113 @@ +package plg_search_sqlitefts + +import ( + . "github.com/mickael-kerjean/filestash/server/common" + "path/filepath" + "regexp" + "time" +) + +const ( + PHASE_EXPLORE = "PHASE_EXPLORE" + PHASE_INDEXING = "PHASE_INDEXING" + PHASE_MAINTAIN = "PHASE_MAINTAIN" + PHASE_PAUSE = "PHASE_PAUSE" +) + +func init() { + sh := SearchHint{} + Hooks.Register.SearchEngine(SqliteSearch{Hint: &sh}) + Hooks.Register.AuthorisationMiddleware(&sh) +} + +type SqliteSearch struct { + Hint *SearchHint +} + +func (this SqliteSearch) Query(app App, path string, keyword string) ([]IFile, error) { + files := []IFile{} + + // extract our search indexer + s := SProc.HintLs(&app, path) + if s == nil { + return files, ErrNotReachable + } + + if path == "" { + path = "/" + } + + rows, err := s.DB.Query( + "SELECT type, path, size, modTime FROM file WHERE path IN ("+ + " SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?"+ + " ORDER BY rank LIMIT 2000"+ + ")", + regexp.MustCompile(`(\.|\-)`).ReplaceAllString(keyword, "\"$1\""), + path, path+"~", + ) + if err != nil { + Log.Warning("search::query DBQuery (%s)", err.Error()) + return files, ErrNotReachable + } + defer rows.Close() + for rows.Next() { + f := File{} + var t string + if err = rows.Scan(&f.FType, &f.FPath, &f.FSize, &t); err != nil { + Log.Warning("search::query scan (%s)", err.Error()) + return files, ErrNotReachable + } + if tm, err := time.Parse(time.RFC3339, t); err == nil { + f.FTime = tm.Unix() * 1000 + } + f.FName = filepath.Base(f.FPath) + files = append(files, f) + } + return files, nil +} + +/* + * We're listening to what the user is doing to hint the crawler over + * what needs to be updated in priority, what file got updated and would need + * to be reindexed, what should disappear from the index, .... + * This way we can fine tune how full text search is behaving + */ + +type SearchHint struct{} + +func (this SearchHint) Ls(ctx App, path string) error { + go SProc.HintLs(&ctx, path) + return nil +} + +func (this SearchHint) Cat(ctx App, path string) error { + go SProc.HintLs(&ctx, filepath.Dir(path)+"/") + return nil +} + +func (this SearchHint) Mkdir(ctx App, path string) error { + go SProc.HintLs(&ctx, filepath.Dir(path)+"/") + return nil +} + +func (this SearchHint) Rm(ctx App, path string) error { + go SProc.HintRm(&ctx, path) + return nil +} + +func (this SearchHint) Mv(ctx App, from string, to string) error { + go SProc.HintRm(&ctx, filepath.Dir(from)+"/") + go SProc.HintLs(&ctx, filepath.Dir(to)+"/") + return nil +} + +func (this SearchHint) Save(ctx App, path string) error { + go SProc.HintLs(&ctx, filepath.Dir(path)+"/") + go SProc.HintFile(&ctx, path) + return nil +} + +func (this SearchHint) Touch(ctx App, path string) error { + go SProc.HintLs(&ctx, filepath.Dir(path)+"/") + return nil +} diff --git a/server/model/search.go b/server/plugin/plg_search_sqlitefts/spider.go similarity index 57% rename from server/model/search.go rename to server/plugin/plg_search_sqlitefts/spider.go index 2068828d..6a7de285 100644 --- a/server/model/search.go +++ b/server/plugin/plg_search_sqlitefts/spider.go @@ -1,10 +1,9 @@ -package model +package plg_search_sqlitefts import ( "container/heap" "database/sql" "encoding/base64" - "fmt" "github.com/mattn/go-sqlite3" . "github.com/mickael-kerjean/filestash/server/common" "github.com/mickael-kerjean/filestash/server/model/formater" @@ -12,337 +11,12 @@ import ( "io/ioutil" "os" "path/filepath" - "regexp" "strconv" "strings" "sync" "time" ) -const ( - PHASE_EXPLORE = "PHASE_EXPLORE" - PHASE_INDEXING = "PHASE_INDEXING" - PHASE_MAINTAIN = "PHASE_MAINTAIN" - PHASE_PAUSE = "PHASE_PAUSE" - MAX_HEAP_SIZE = 100000 -) - -var ( - SEARCH_ENABLE func() bool - SEARCH_TIMEOUT func() time.Duration - SEARCH_PROCESS_MAX func() int - SEARCH_PROCESS_PAR func() int - SEARCH_REINDEX func() int - CYCLE_TIME func() int - INDEXING_EXT func() string - MAX_INDEXING_FSIZE func() int - INDEXING_EXCLUSION = []string{"/node_modules/", "/bower_components/", "/.cache/", "/.npm/", "/.git/"} -) - -var SProc SearchProcess = SearchProcess{ - idx: make([]SearchIndexer, 0), - n: -1, -} - -func init() { - SEARCH_ENABLE = func() bool { - return Config.Get("features.search.enable").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Name = "enable" - f.Type = "enable" - f.Target = []string{"process_max", "process_par", "reindex_time", "cycle_time", "max_size", "indexer_ext"} - f.Description = "Enable/Disable full text search" - f.Placeholder = "Default: false" - f.Default = false - return f - }).Bool() - } - SEARCH_ENABLE() - SEARCH_TIMEOUT = func() time.Duration { - return time.Duration(Config.Get("features.search.explore_timeout").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Name = "explore_timeout" - f.Type = "number" - f.Default = 300 - f.Description = `When full text search is disabled, the search engine recursively explore - directories to find results. Exploration can't last longer than what is configured here` - f.Placeholder = fmt.Sprintf("Default: %dms", f.Default) - return f - }).Int()) * time.Millisecond - } - SEARCH_TIMEOUT() - SEARCH_PROCESS_MAX = func() int { - return Config.Get("features.search.process_max").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "process_max" - f.Name = "process_max" - f.Type = "number" - f.Description = "Size of the pool containing the indexers" - f.Placeholder = "Default: 5" - f.Default = 5 - return f - }).Int() - } - SEARCH_PROCESS_MAX() - SEARCH_PROCESS_PAR = func() int { - return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "process_par" - f.Name = "process_par" - f.Type = "number" - f.Description = "How many concurrent indexers are running in the same time (requires a restart)" - f.Placeholder = "Default: 2" - f.Default = 2 - return f - }).Int() - } - SEARCH_PROCESS_PAR() - SEARCH_REINDEX = func() int { - return Config.Get("features.search.reindex_time").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "reindex_time" - f.Name = "reindex_time" - f.Type = "number" - f.Description = "Time in hours after which we consider our index to be stale and needs to be reindexed" - f.Placeholder = "Default: 24h" - f.Default = 24 - return f - }).Int() - } - SEARCH_REINDEX() - CYCLE_TIME = func() int { - return Config.Get("features.search.cycle_time").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "cycle_time" - f.Name = "cycle_time" - f.Type = "number" - f.Description = "Time the indexer needs to spend for each cycle in seconds (discovery, indexing and maintenance)" - f.Placeholder = "Default: 10s" - f.Default = 10 - return f - }).Int() - } - CYCLE_TIME() - MAX_INDEXING_FSIZE = func() int { - return Config.Get("features.search.max_size").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "max_size" - f.Name = "max_size" - f.Type = "number" - f.Description = "Maximum size of files the indexer will perform full text search" - f.Placeholder = "Default: 524288000 => 512MB" - f.Default = 524288000 - return f - }).Int() - } - MAX_INDEXING_FSIZE() - INDEXING_EXT = func() string { - return Config.Get("features.search.indexer_ext").Schema(func(f *FormElement) *FormElement { - if f == nil { - f = &FormElement{} - } - f.Id = "indexer_ext" - f.Name = "indexer_ext" - f.Type = "string" - f.Description = "File extension we want to see indexed" - f.Placeholder = "Default: org,txt,docx,pdf,md,form" - f.Default = "org,txt,docx,pdf,md,form" - return f - }).String() - } - INDEXING_EXT() - - onChange := Config.ListenForChange() - runner := func() { - startSearch := false - for { - if SEARCH_ENABLE() == false { - select { - case <-onChange.Listener: - startSearch = SEARCH_ENABLE() - } - if startSearch == false { - continue - } - } - sidx := SProc.Peek() - if sidx == nil { - time.Sleep(5 * time.Second) - continue - } - sidx.mu.Lock() - sidx.Execute() - sidx.mu.Unlock() - } - } - for i := 0; i < SEARCH_PROCESS_PAR(); i++ { - go runner() - } -} - -func SearchStateful(app *App, path string, keyword string) []File { - var files []File = make([]File, 0) - - // extract our search indexer - s := SProc.HintLs(app, path) - if s == nil { - return files - } - - if path == "" { - path = "/" - } - - rows, err := s.DB.Query( - "SELECT type, path, size, modTime FROM file WHERE path IN ("+ - " SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?"+ - " ORDER BY rank LIMIT 2000"+ - ")", - regexp.MustCompile(`(\.|\-)`).ReplaceAllString(keyword, "\"$1\""), - path, path+"~", - ) - if err != nil { - return files - } - defer rows.Close() - for rows.Next() { - f := File{} - var t string - if err = rows.Scan(&f.FType, &f.FPath, &f.FSize, &t); err != nil { - Log.Warning("search::find search_error (%v)", err) - return files - } - if tm, err := time.Parse(time.RFC3339, t); err == nil { - f.FTime = tm.Unix() * 1000 - } - f.FName = filepath.Base(f.FPath) - files = append(files, f) - } - return files -} - -type SearchProcess struct { - idx []SearchIndexer - n int - mu sync.RWMutex -} - -func (this *SearchProcess) HintLs(app *App, path string) *SearchIndexer { - id := GenerateID(app) - - // try to find the search indexer among the existing ones - this.mu.RLock() - for i := len(this.idx) - 1; i >= 0; i-- { - if id == this.idx[i].Id { - alreadyHasPath := false - for j := 0; j < len(this.idx[i].FoldersUnknown); j++ { - if this.idx[i].FoldersUnknown[j].Path == path { - alreadyHasPath = true - break - } - } - if alreadyHasPath == false { - heap.Push(&this.idx[i].FoldersUnknown, &Document{ - Type: "directory", - Path: path, - InitialPath: path, - Name: filepath.Base(path), - }) - } - ret := &this.idx[i] - this.mu.RUnlock() - return ret - } - } - this.mu.RUnlock() - - // Having all indexers running in memory could be expensive => instead we're cycling a pool - search_process_max := SEARCH_PROCESS_MAX() - this.mu.Lock() - lenIdx := len(this.idx) - if lenIdx > 0 && search_process_max > 0 && lenIdx > (search_process_max-1) { - toDel := this.idx[0 : lenIdx-(search_process_max-1)] - for i := range toDel { - toDel[i].DB.Close() - } - this.idx = this.idx[lenIdx-(search_process_max-1):] - } - // instantiate the new indexer - s := NewSearchIndexer(id, app.Backend) - heap.Push(&s.FoldersUnknown, &Document{ - Type: "directory", - Path: path, - InitialPath: path, - Name: filepath.Base(path), - }) - this.idx = append(this.idx, s) - this.mu.Unlock() - return &s -} - -func (this *SearchProcess) HintRm(app *App, path string) { - id := GenerateID(app) - this.mu.RLock() - for i := len(this.idx) - 1; i >= 0; i-- { - if id == this.idx[i].Id { - this.idx[i].DB.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path+"~") - break - } - } - this.mu.RUnlock() -} - -func (this *SearchProcess) HintFile(app *App, path string) { - id := GenerateID(app) - this.mu.RLock() - for i := len(this.idx) - 1; i >= 0; i-- { - if id == this.idx[i].Id { - this.idx[i].DB.Exec("UPDATE file set indexTime = NULL WHERE path = ?", path) - break - } - } - this.mu.RUnlock() -} - -func (this *SearchProcess) Peek() *SearchIndexer { - if len(this.idx) == 0 { - return nil - } - this.mu.Lock() - if this.n >= len(this.idx)-1 || this.n < 0 { - this.n = 0 - } else { - this.n = this.n + 1 - } - s := &this.idx[this.n] - this.mu.Unlock() - return s -} - -func (this *SearchProcess) Reset() { - this.mu.Lock() - for i := range this.idx { - this.idx[i].DB.Close() - } - this.idx = make([]SearchIndexer, 0) - this.mu.Unlock() - this.n = -1 -} - type SearchIndexer struct { Id string FoldersUnknown HeapDoc @@ -811,49 +485,3 @@ func (this *SearchIndexer) dbDelete(parent string, f os.FileInfo, tx *sql.Tx) er ) return err } - -type Document struct { - Hash string `json:"-"` - Type string `json:"type"` - Name string `json:"name"` - Path string `json:"path"` - InitialPath string `json:"-"` - Ext string `json:"ext"` - ModTime time.Time `json:"time"` - Size int64 `json:"size"` - Content []byte `json:"content"` - Priority int `json:"-"` -} - -// https://golang.org/pkg/container/heap/ -type HeapDoc []*Document - -func (h HeapDoc) Len() int { return len(h) } -func (h HeapDoc) Less(i, j int) bool { - if h[i].Priority != 0 || h[j].Priority != 0 { - return h[i].Priority < h[j].Priority - } - scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/")) - scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/")) - return scoreA < scoreB -} -func (h HeapDoc) Swap(i, j int) { - a := h[i] - h[i] = h[j] - h[j] = a -} -func (h *HeapDoc) Push(x interface{}) { - if h.Len() < MAX_HEAP_SIZE { - *h = append(*h, x.(*Document)) - } -} -func (h *HeapDoc) Pop() interface{} { - old := *h - n := len(old) - if n == 0 { - return nil - } - x := old[n-1] - *h = old[0 : n-1] - return x -} diff --git a/server/plugin/plg_search_sqlitefts/utils.go b/server/plugin/plg_search_sqlitefts/utils.go new file mode 100644 index 00000000..b27b1b31 --- /dev/null +++ b/server/plugin/plg_search_sqlitefts/utils.go @@ -0,0 +1,54 @@ +package plg_search_sqlitefts + +import ( + "strings" + "time" +) + +const MAX_HEAP_SIZE = 100000 + +type Document struct { + Hash string `json:"-"` + Type string `json:"type"` + Name string `json:"name"` + Path string `json:"path"` + InitialPath string `json:"-"` + Ext string `json:"ext"` + ModTime time.Time `json:"time"` + Size int64 `json:"size"` + Content []byte `json:"content"` + Priority int `json:"-"` +} + +// https://golang.org/pkg/container/heap/ +type HeapDoc []*Document + +func (h HeapDoc) Len() int { return len(h) } +func (h HeapDoc) Less(i, j int) bool { + if h[i].Priority != 0 || h[j].Priority != 0 { + return h[i].Priority < h[j].Priority + } + scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/")) + scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/")) + return scoreA < scoreB +} +func (h HeapDoc) Swap(i, j int) { + a := h[i] + h[i] = h[j] + h[j] = a +} +func (h *HeapDoc) Push(x interface{}) { + if h.Len() < MAX_HEAP_SIZE { + *h = append(*h, x.(*Document)) + } +} +func (h *HeapDoc) Pop() interface{} { + old := *h + n := len(old) + if n == 0 { + return nil + } + x := old[n-1] + *h = old[0 : n-1] + return x +} diff --git a/server/plugin/plg_search_stateless/config.go b/server/plugin/plg_search_stateless/config.go new file mode 100644 index 00000000..960ed568 --- /dev/null +++ b/server/plugin/plg_search_stateless/config.go @@ -0,0 +1,30 @@ +package plg_search_stateless + +import ( + "fmt" + . "github.com/mickael-kerjean/filestash/server/common" + "time" +) + +var ( + SEARCH_TIMEOUT func() time.Duration +) + +func init() { + SEARCH_TIMEOUT = func() time.Duration { + return time.Duration(Config.Get("features.search.explore_timeout").Schema(func(f *FormElement) *FormElement { + if f == nil { + f = &FormElement{} + } + f.Name = "explore_timeout" + f.Type = "number" + f.Default = 300 + f.Description = `When full text search is disabled, the search engine recursively explore + directories to find results. Exploration can't last longer than what is configured here` + f.Placeholder = fmt.Sprintf("Default: %dms", f.Default) + return f + }).Int()) * time.Millisecond + } + SEARCH_TIMEOUT() + +} diff --git a/server/model/search_stateless.go b/server/plugin/plg_search_stateless/index.go similarity index 58% rename from server/model/search_stateless.go rename to server/plugin/plg_search_stateless/index.go index 89e32ccc..64e87d4c 100644 --- a/server/model/search_stateless.go +++ b/server/plugin/plg_search_stateless/index.go @@ -1,78 +1,30 @@ -package model +package plg_search_stateless import ( . "github.com/mickael-kerjean/filestash/server/common" - "os" - "path/filepath" "strings" "time" ) +func init() { + Hooks.Register.SearchEngine(StatelessSearch{}) +} + type PathQuandidate struct { Path string Score int } -func scoreBoostForPath(p string) int { - b := strings.ToLower(filepath.Base(p)) +type StatelessSearch struct{} - // some path are garbage we don't want to explore unless there's nothing else to do - if b == "node_modules" { - return -100 - } else if strings.HasPrefix(b, ".") { - return -10 - } - - // not all path are equally interesting, we bump the score of what we thing is interesting - score := 0 - if strings.Contains(b, "document") { - score += 3 - } else if strings.Contains(b, "project") { - score += 3 - } else if strings.Contains(b, "home") { - score += 3 - } else if strings.Contains(b, "note") { - score += 3 - } - return score -} - -func scoreBoostForFilesInDirectory(f []os.FileInfo) int { - s := 0 - for i := 0; i < len(f); i++ { - name := f[i].Name() - if f[i].IsDir() == false { - if strings.HasSuffix(name, ".org") { - s += 2 - } else if strings.HasSuffix(name, ".pdf") { - s += 1 - } else if strings.HasSuffix(name, ".doc") || strings.HasSuffix(name, ".docx") { - s += 1 - } else if strings.HasSuffix(name, ".md") { - s += 1 - } else if strings.HasSuffix(name, ".pdf") { - s += 1 - } - } - if s > 4 { - return 4 - } - } - return s -} - -func scoreBoostOnDepth(p string) int { - return -strings.Count(p, "/") -} - -func SearchStateLess(app *App, path string, keyword string) []File { - files := make([]File, 0) +func (this StatelessSearch) Query(app App, path string, keyword string) ([]IFile, error) { + files := make([]IFile, 0) toVisit := []PathQuandidate{PathQuandidate{path, 0}} MAX_SEARCH_TIME := SEARCH_TIMEOUT() for start := time.Now(); time.Since(start) < MAX_SEARCH_TIME; { if len(toVisit) == 0 { - return files + return files, nil } currentPath := toVisit[0] if len(toVisit) == 0 { @@ -140,5 +92,5 @@ func SearchStateLess(app *App, path string, keyword string) []File { } } } - return files + return files, nil } diff --git a/server/plugin/plg_search_stateless/scoring.go b/server/plugin/plg_search_stateless/scoring.go new file mode 100644 index 00000000..25c5195d --- /dev/null +++ b/server/plugin/plg_search_stateless/scoring.go @@ -0,0 +1,59 @@ +package plg_search_stateless + +import ( + "os" + "path/filepath" + "strings" +) + +func scoreBoostForPath(p string) int { + b := strings.ToLower(filepath.Base(p)) + + // some path are garbage we don't want to explore unless there's nothing else to do + if b == "node_modules" { + return -100 + } else if strings.HasPrefix(b, ".") { + return -10 + } + + // not all path are equally interesting, we bump the score of what we thing is interesting + score := 0 + if strings.Contains(b, "document") { + score += 3 + } else if strings.Contains(b, "project") { + score += 3 + } else if strings.Contains(b, "home") { + score += 3 + } else if strings.Contains(b, "note") { + score += 3 + } + return score +} + +func scoreBoostForFilesInDirectory(f []os.FileInfo) int { + s := 0 + for i := 0; i < len(f); i++ { + name := f[i].Name() + if f[i].IsDir() == false { + if strings.HasSuffix(name, ".org") { + s += 2 + } else if strings.HasSuffix(name, ".pdf") { + s += 1 + } else if strings.HasSuffix(name, ".doc") || strings.HasSuffix(name, ".docx") { + s += 1 + } else if strings.HasSuffix(name, ".md") { + s += 1 + } else if strings.HasSuffix(name, ".pdf") { + s += 1 + } + } + if s > 4 { + return 4 + } + } + return s +} + +func scoreBoostOnDepth(p string) int { + return -strings.Count(p, "/") +} diff --git a/server/plugin/plg_security_killswitch/index.go b/server/plugin/plg_security_killswitch/index.go deleted file mode 100644 index c76d634c..00000000 --- a/server/plugin/plg_security_killswitch/index.go +++ /dev/null @@ -1,68 +0,0 @@ -package plg_security_killswitch - -/* - * This package was made after the log4j CVE to have a way to remotly kill an instance if something - * terrible were to happen. - */ - -import ( - "encoding/json" - "fmt" - . "github.com/mickael-kerjean/filestash/server/common" - "net/http" - "os" - "time" -) - -func init() { - Log.Debug("Killswitch enabled") - main() - go func() { - for range time.Tick(time.Second * 1800) { // every 60 minutes - main() - } - }() -} - -func main() { - req, err := http.NewRequest( - "GET", - fmt.Sprintf( - "https://downloads.filestash.app/api/killswitch.php?version=%s&host=%s", - APP_VERSION+"."+BUILD_DATE, - Config.Get("general.host").String(), - ), - nil, - ) - if err != nil { - return - } - res, err := HTTPClient.Do(req) - if err != nil { - return - } - defer res.Body.Close() - if res.StatusCode != http.StatusOK { - return - } - d := struct { - Status string `json:"status"` - Action string `json:"action"` - Message string `json:"message"` - }{} - if err = json.NewDecoder(res.Body).Decode(&d); err != nil { - return - } - if d.Status != "ok" { - return - } - switch d.Action { - case "EXIT": - Log.Warning("REMOTE KILLSWITCH ENGAGED - %s", d.Message) - os.Exit(1) - default: - if d.Message != "" { - Log.Info("REMOTE MESSAGE - %s", d.Message) - } - } -}