feature (search): search as a plugin

This commit is contained in:
Mickael Kerjean 2022-01-13 03:07:25 +11:00
parent cad6fe3420
commit a91df1637d
17 changed files with 729 additions and 522 deletions

View file

@ -22,9 +22,14 @@ var Hooks = struct {
}{
Get: Get{},
Register: Register{},
All: All{},
}
/*
* ProcessFileContentBeforeSend is a processing hooks used in plugins like:
* 1. pluggable image transcoding service: plg_image_light, plg_image_bimg, plg_image_golang
* 2. video transcoding service: plg_video_transcode
* 3. disallow certain type of file: plg_security_svg
*/
var process_file_content_before_send []func(io.ReadCloser, *App, *http.ResponseWriter, *http.Request) (io.ReadCloser, error)
func (this Register) ProcessFileContentBeforeSend(fn func(io.ReadCloser, *App, *http.ResponseWriter, *http.Request) (io.ReadCloser, error)) {
@ -34,6 +39,14 @@ func (this Get) ProcessFileContentBeforeSend() []func(io.ReadCloser, *App, *http
return process_file_content_before_send
}
/*
* HttpEndpoint is a hook that makes it possible to register new endpoint in the application.
* It is used in plugin like:
* 1. plg_video_transcoder to server the transcoded video segment via hls
* 2. plg_editor_onlyoffice to server the content for a custom type in an iframe
* 3. plg_handler_syncthing to create better integration with syncthing
* 4. plg_handler_console to server a full blown console for debugging the application
*/
var http_endpoint []func(*mux.Router, *App) error
func (this Register) HttpEndpoint(fn func(*mux.Router, *App) error) {
@ -43,6 +56,14 @@ func (this Get) HttpEndpoint() []func(*mux.Router, *App) error {
return http_endpoint
}
/*
* Starter is the meat that let us connect to a wide variety of server like:
* - plg_starter_http which is the default that server the application under 8334
* - plg_starter_tor to serve the application via tor
* - plg_starter_web that create ssl certificate via letsencrypt
* - plg_started_http2 to create an HTTP2 server
* - ...
*/
var starter_process []func(*mux.Router)
func (this Register) Starter(fn func(*mux.Router)) {
@ -52,16 +73,55 @@ func (this Get) Starter() []func(*mux.Router) {
return starter_process
}
/*
* AuthenticationMiddleware is what enabled us to authenticate user via different means:
* - plg_authentication_admin to enable connection to an admin
* - plg_authentication_saml
* - plg_authentication_openid
* - plg_authentication_ldap
* - ...
*/
var authentication_middleware map[string]IAuth = make(map[string]IAuth, 0)
func (this Register) AuthenticationMiddleware(id string, am IAuth) {
authentication_middleware[id] = am
}
func (this All) AuthenticationMiddleware() map[string]IAuth {
func (this Get) AuthenticationMiddleware() map[string]IAuth {
return authentication_middleware
}
/*
* AuthorisationMiddleware is to enable custom rule for authorisation. eg: anonymous can see, registered
* user can see/edit some files but not some others, admin can do everything
*/
var authorisation_middleware []IAuthorisation
func (this Register) AuthorisationMiddleware(a IAuthorisation) {
authorisation_middleware = append(authorisation_middleware, a)
}
func (this Get) AuthorisationMiddleware() []IAuthorisation {
return authorisation_middleware
}
/*
* Search is the pluggable search mechanism. By default, there's 2 options:
* - plg_search_stateless which does stateless search based on filename only
* - plg_search_statefull which does full text search with a sqlite data store
* The idea here is to enable different type of usage like leveraging elastic search or solr
* with custom stuff around it
*/
var search ISearch
func (this Register) SearchEngine(s ISearch) {
search = s
}
func (this Get) SearchEngine() ISearch {
return search
}
/*
* UI Overrides
* They are the means by which server plugin change the frontend behaviors.

View file

@ -26,6 +26,25 @@ type IAuth interface {
Callback(formData map[string]string, idpParams map[string]string, res http.ResponseWriter) (map[string]string, error)
}
type IAuthorisation interface {
Ls(ctx App, path string) error
Cat(ctx App, path string) error
Mkdir(ctx App, path string) error
Rm(ctx App, path string) error
Mv(ctx App, from string, to string) error
Save(ctx App, path string) error
Touch(ctx App, path string) error
}
type IFile interface {
os.FileInfo
Path() string
}
type ISearch interface {
Query(ctx App, basePath string, term string) ([]IFile, error)
}
type File struct {
FName string `json:"name"`
FType string `json:"type"`
@ -65,6 +84,10 @@ func (f File) Sys() interface{} {
return nil
}
func (f File) Path() string {
return f.FPath
}
type Metadata struct {
CanSee *bool `json:"can_read,omitempty"`
CanCreateFile *bool `json:"can_create_file,omitempty"`

View file

@ -86,7 +86,7 @@ func AdminBackend(ctx App, res http.ResponseWriter, req *http.Request) {
}
func AdminAuthenticationMiddleware(ctx App, res http.ResponseWriter, req *http.Request) {
drivers := Hooks.All.AuthenticationMiddleware()
drivers := Hooks.Get.AuthenticationMiddleware()
middlewares := make(map[string]Form, len(drivers))
for id, driver := range drivers {
middlewares[id] = driver.Setup()

View file

@ -64,6 +64,13 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) {
SendErrorResult(res, err)
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Ls(ctx, path); err != nil {
Log.Info("ls::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
entries, err := ctx.Backend.Ls(path)
if err != nil {
@ -71,7 +78,6 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) {
SendErrorResult(res, err)
return
}
go model.SProc.HintLs(&ctx, path)
files := make([]FileInfo, len(entries))
etagger := fnv.New32()
@ -148,6 +154,14 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) {
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Cat(ctx, path); err != nil {
Log.Info("cat::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
var file io.ReadCloser
var contentLength int64 = -1
var needToCreateCache bool = false
@ -177,7 +191,6 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) {
if req.Header.Get("range") != "" {
needToCreateCache = true
}
go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/")
}
// plugin hooks
@ -350,6 +363,14 @@ func FileSave(ctx App, res http.ResponseWriter, req *http.Request) {
}
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Save(ctx, path); err != nil {
Log.Info("save::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
err = ctx.Backend.Save(path, req.Body)
req.Body.Close()
if err != nil {
@ -357,8 +378,6 @@ func FileSave(ctx App, res http.ResponseWriter, req *http.Request) {
SendErrorResult(res, NewError(err.Error(), 403))
return
}
go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/")
go model.SProc.HintFile(&ctx, path)
SendSuccessResult(res, nil)
}
@ -387,15 +406,20 @@ func FileMv(ctx App, res http.ResponseWriter, req *http.Request) {
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Mv(ctx, from, to); err != nil {
Log.Info("mv::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
err = ctx.Backend.Mv(from, to)
if err != nil {
Log.Debug("mv::backend '%s'", err.Error())
SendErrorResult(res, err)
return
}
go model.SProc.HintRm(&ctx, filepath.Dir(from)+"/")
go model.SProc.HintLs(&ctx, filepath.Dir(to)+"/")
SendSuccessResult(res, nil)
}
@ -412,13 +436,21 @@ func FileRm(ctx App, res http.ResponseWriter, req *http.Request) {
SendErrorResult(res, err)
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Rm(ctx, path); err != nil {
Log.Info("rm::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
err = ctx.Backend.Rm(path)
if err != nil {
Log.Debug("rm::backend '%s'", err.Error())
SendErrorResult(res, err)
return
}
model.SProc.HintRm(&ctx, path)
SendSuccessResult(res, nil)
}
@ -436,13 +468,20 @@ func FileMkdir(ctx App, res http.ResponseWriter, req *http.Request) {
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Mkdir(ctx, path); err != nil {
Log.Info("mkdir::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
err = ctx.Backend.Mkdir(path)
if err != nil {
Log.Debug("mkdir::backend '%s'", err.Error())
SendErrorResult(res, err)
return
}
go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/")
SendSuccessResult(res, nil)
}
@ -460,13 +499,20 @@ func FileTouch(ctx App, res http.ResponseWriter, req *http.Request) {
return
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Touch(ctx, path); err != nil {
Log.Info("touch::auth '%s'", err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
err = ctx.Backend.Touch(path)
if err != nil {
Log.Debug("touch::backend '%s'", err.Error())
SendErrorResult(res, err)
return
}
go model.SProc.HintLs(&ctx, filepath.Dir(path)+"/")
SendSuccessResult(res, nil)
}
@ -545,6 +591,19 @@ func FileDownloader(ctx App, res http.ResponseWriter, req *http.Request) {
} else {
zipRoot = strings.TrimSuffix(paths[i], filepath.Base(paths[i]))
}
for _, auth := range Hooks.Get.AuthorisationMiddleware() {
if err = auth.Ls(ctx, paths[i]); err != nil {
Log.Info("downloader::ls::auth path['%s'] => '%s'", paths[i], err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
if err = auth.Cat(ctx, paths[i]); err != nil {
Log.Info("downloader::cat::auth path['%s'] => '%s'", paths[i], err.Error())
SendErrorResult(res, ErrNotAuthorized)
return
}
}
addToZipRecursive(ctx, zipWriter, paths[i], zipRoot)
}
}

View file

@ -19,19 +19,35 @@ func FileSearch(ctx App, res http.ResponseWriter, req *http.Request) {
return
}
var searchResults []File
if Config.Get("features.search.enable").Bool() {
searchResults = model.SearchStateful(&ctx, path, q)
} else {
searchResults = model.SearchStateLess(&ctx, path, q)
var searchResults []IFile
searchEngine := Hooks.Get.SearchEngine()
if searchEngine == nil {
SendErrorResult(res, ErrMissingDependency)
return
}
searchResults, err = searchEngine.Query(ctx, path, q)
if err != nil {
SendErrorResult(res, err)
return
}
// overwrite the path of a file according to chroot
if ctx.Session["path"] != "" {
for i := 0; i < len(searchResults); i++ {
searchResults[i].FPath = "/" + strings.TrimPrefix(
searchResults[i].FPath,
ctx.Session["path"],
)
searchResults[i] = File{
FName: searchResults[i].Name(),
FSize: searchResults[i].Size(),
FType: func() string {
if searchResults[i].IsDir() {
return "directory"
}
return "file"
}(),
FPath: "/" + strings.TrimPrefix(
searchResults[i].Path(),
ctx.Session["path"],
),
}
}
}
SendSuccessResults(res, searchResults)

View file

@ -170,7 +170,7 @@ func SessionAuthMiddleware(ctx App, res http.ResponseWriter, req *http.Request)
if selectedPluginId == "" {
return nil
}
for key, plugin := range Hooks.All.AuthenticationMiddleware() {
for key, plugin := range Hooks.Get.AuthenticationMiddleware() {
if key == selectedPluginId {
return plugin
}

View file

@ -0,0 +1,7 @@
This is a bare bone utilities to convert a stream onto text for full text search purpose.
There's some other alternative but none of them run with a small footprint.
At the moment it supports:
- office documents
- pdf (TODO: remove dependency on pdftotext)
- text base files

View file

@ -25,6 +25,7 @@ import (
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_handler_console"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_handler_syncthing"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_image_light"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_search_stateless"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_security_scanner"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_security_svg"
_ "github.com/mickael-kerjean/filestash/server/plugin/plg_starter_http"

View file

@ -0,0 +1,152 @@
package plg_search_sqlitefts
import (
. "github.com/mickael-kerjean/filestash/server/common"
"time"
)
var (
SEARCH_ENABLE func() bool
SEARCH_PROCESS_MAX func() int
SEARCH_PROCESS_PAR func() int
SEARCH_REINDEX func() int
CYCLE_TIME func() int
INDEXING_EXT func() string
MAX_INDEXING_FSIZE func() int
INDEXING_EXCLUSION = []string{"/node_modules/", "/bower_components/", "/.cache/", "/.npm/", "/.git/"}
)
func init() {
SEARCH_ENABLE = func() bool {
return Config.Get("features.search.enable").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Name = "enable"
f.Type = "enable"
f.Target = []string{"process_max", "process_par", "reindex_time", "cycle_time", "max_size", "indexer_ext"}
f.Description = "Enable/Disable full text search"
f.Placeholder = "Default: false"
f.Default = false
return f
}).Bool()
}
SEARCH_ENABLE()
SEARCH_PROCESS_MAX = func() int {
return Config.Get("features.search.process_max").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "process_max"
f.Name = "process_max"
f.Type = "number"
f.Description = "Size of the pool containing the indexers"
f.Placeholder = "Default: 5"
f.Default = 5
return f
}).Int()
}
SEARCH_PROCESS_MAX()
SEARCH_PROCESS_PAR = func() int {
return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "process_par"
f.Name = "process_par"
f.Type = "number"
f.Description = "How many concurrent indexers are running in the same time (requires a restart)"
f.Placeholder = "Default: 2"
f.Default = 2
return f
}).Int()
}
SEARCH_PROCESS_PAR()
SEARCH_REINDEX = func() int {
return Config.Get("features.search.reindex_time").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "reindex_time"
f.Name = "reindex_time"
f.Type = "number"
f.Description = "Time in hours after which we consider our index to be stale and needs to be reindexed"
f.Placeholder = "Default: 24h"
f.Default = 24
return f
}).Int()
}
SEARCH_REINDEX()
CYCLE_TIME = func() int {
return Config.Get("features.search.cycle_time").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "cycle_time"
f.Name = "cycle_time"
f.Type = "number"
f.Description = "Time the indexer needs to spend for each cycle in seconds (discovery, indexing and maintenance)"
f.Placeholder = "Default: 10s"
f.Default = 10
return f
}).Int()
}
CYCLE_TIME()
MAX_INDEXING_FSIZE = func() int {
return Config.Get("features.search.max_size").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "max_size"
f.Name = "max_size"
f.Type = "number"
f.Description = "Maximum size of files the indexer will perform full text search"
f.Placeholder = "Default: 524288000 => 512MB"
f.Default = 524288000
return f
}).Int()
}
MAX_INDEXING_FSIZE()
INDEXING_EXT = func() string {
return Config.Get("features.search.indexer_ext").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "indexer_ext"
f.Name = "indexer_ext"
f.Type = "string"
f.Description = "File extension we want to see indexed"
f.Placeholder = "Default: org,txt,docx,pdf,md,form"
f.Default = "org,txt,docx,pdf,md,form"
return f
}).String()
}
INDEXING_EXT()
onChange := Config.ListenForChange()
runner := func() {
startSearch := false
for {
if SEARCH_ENABLE() == false {
select {
case <-onChange.Listener:
startSearch = SEARCH_ENABLE()
}
if startSearch == false {
continue
}
}
sidx := SProc.Peek()
if sidx == nil {
time.Sleep(5 * time.Second)
continue
}
sidx.mu.Lock()
sidx.Execute()
sidx.mu.Unlock()
}
}
for i := 0; i < SEARCH_PROCESS_PAR(); i++ {
go runner()
}
}

View file

@ -0,0 +1,121 @@
package plg_search_sqlitefts
import (
"container/heap"
. "github.com/mickael-kerjean/filestash/server/common"
"path/filepath"
"sync"
)
var SProc SearchProcess = SearchProcess{
idx: make([]SearchIndexer, 0),
n: -1,
}
type SearchProcess struct {
idx []SearchIndexer
n int
mu sync.RWMutex
}
func (this *SearchProcess) HintLs(app *App, path string) *SearchIndexer {
id := GenerateID(app)
// try to find the search indexer among the existing ones
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
alreadyHasPath := false
for j := 0; j < len(this.idx[i].FoldersUnknown); j++ {
if this.idx[i].FoldersUnknown[j].Path == path {
alreadyHasPath = true
break
}
}
if alreadyHasPath == false {
heap.Push(&this.idx[i].FoldersUnknown, &Document{
Type: "directory",
Path: path,
InitialPath: path,
Name: filepath.Base(path),
})
}
ret := &this.idx[i]
this.mu.RUnlock()
return ret
}
}
this.mu.RUnlock()
// Having all indexers running in memory could be expensive => instead we're cycling a pool
search_process_max := SEARCH_PROCESS_MAX()
this.mu.Lock()
lenIdx := len(this.idx)
if lenIdx > 0 && search_process_max > 0 && lenIdx > (search_process_max-1) {
toDel := this.idx[0 : lenIdx-(search_process_max-1)]
for i := range toDel {
toDel[i].DB.Close()
}
this.idx = this.idx[lenIdx-(search_process_max-1):]
}
// instantiate the new indexer
s := NewSearchIndexer(id, app.Backend)
heap.Push(&s.FoldersUnknown, &Document{
Type: "directory",
Path: path,
InitialPath: path,
Name: filepath.Base(path),
})
this.idx = append(this.idx, s)
this.mu.Unlock()
return &s
}
func (this *SearchProcess) HintRm(app *App, path string) {
id := GenerateID(app)
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
this.idx[i].DB.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path+"~")
break
}
}
this.mu.RUnlock()
}
func (this *SearchProcess) HintFile(app *App, path string) {
id := GenerateID(app)
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
this.idx[i].DB.Exec("UPDATE file set indexTime = NULL WHERE path = ?", path)
break
}
}
this.mu.RUnlock()
}
func (this *SearchProcess) Peek() *SearchIndexer {
if len(this.idx) == 0 {
return nil
}
this.mu.Lock()
if this.n >= len(this.idx)-1 || this.n < 0 {
this.n = 0
} else {
this.n = this.n + 1
}
s := &this.idx[this.n]
this.mu.Unlock()
return s
}
func (this *SearchProcess) Reset() {
this.mu.Lock()
for i := range this.idx {
this.idx[i].DB.Close()
}
this.idx = make([]SearchIndexer, 0)
this.mu.Unlock()
this.n = -1
}

View file

@ -0,0 +1,113 @@
package plg_search_sqlitefts
import (
. "github.com/mickael-kerjean/filestash/server/common"
"path/filepath"
"regexp"
"time"
)
const (
PHASE_EXPLORE = "PHASE_EXPLORE"
PHASE_INDEXING = "PHASE_INDEXING"
PHASE_MAINTAIN = "PHASE_MAINTAIN"
PHASE_PAUSE = "PHASE_PAUSE"
)
func init() {
sh := SearchHint{}
Hooks.Register.SearchEngine(SqliteSearch{Hint: &sh})
Hooks.Register.AuthorisationMiddleware(&sh)
}
type SqliteSearch struct {
Hint *SearchHint
}
func (this SqliteSearch) Query(app App, path string, keyword string) ([]IFile, error) {
files := []IFile{}
// extract our search indexer
s := SProc.HintLs(&app, path)
if s == nil {
return files, ErrNotReachable
}
if path == "" {
path = "/"
}
rows, err := s.DB.Query(
"SELECT type, path, size, modTime FROM file WHERE path IN ("+
" SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?"+
" ORDER BY rank LIMIT 2000"+
")",
regexp.MustCompile(`(\.|\-)`).ReplaceAllString(keyword, "\"$1\""),
path, path+"~",
)
if err != nil {
Log.Warning("search::query DBQuery (%s)", err.Error())
return files, ErrNotReachable
}
defer rows.Close()
for rows.Next() {
f := File{}
var t string
if err = rows.Scan(&f.FType, &f.FPath, &f.FSize, &t); err != nil {
Log.Warning("search::query scan (%s)", err.Error())
return files, ErrNotReachable
}
if tm, err := time.Parse(time.RFC3339, t); err == nil {
f.FTime = tm.Unix() * 1000
}
f.FName = filepath.Base(f.FPath)
files = append(files, f)
}
return files, nil
}
/*
* We're listening to what the user is doing to hint the crawler over
* what needs to be updated in priority, what file got updated and would need
* to be reindexed, what should disappear from the index, ....
* This way we can fine tune how full text search is behaving
*/
type SearchHint struct{}
func (this SearchHint) Ls(ctx App, path string) error {
go SProc.HintLs(&ctx, path)
return nil
}
func (this SearchHint) Cat(ctx App, path string) error {
go SProc.HintLs(&ctx, filepath.Dir(path)+"/")
return nil
}
func (this SearchHint) Mkdir(ctx App, path string) error {
go SProc.HintLs(&ctx, filepath.Dir(path)+"/")
return nil
}
func (this SearchHint) Rm(ctx App, path string) error {
go SProc.HintRm(&ctx, path)
return nil
}
func (this SearchHint) Mv(ctx App, from string, to string) error {
go SProc.HintRm(&ctx, filepath.Dir(from)+"/")
go SProc.HintLs(&ctx, filepath.Dir(to)+"/")
return nil
}
func (this SearchHint) Save(ctx App, path string) error {
go SProc.HintLs(&ctx, filepath.Dir(path)+"/")
go SProc.HintFile(&ctx, path)
return nil
}
func (this SearchHint) Touch(ctx App, path string) error {
go SProc.HintLs(&ctx, filepath.Dir(path)+"/")
return nil
}

View file

@ -1,10 +1,9 @@
package model
package plg_search_sqlitefts
import (
"container/heap"
"database/sql"
"encoding/base64"
"fmt"
"github.com/mattn/go-sqlite3"
. "github.com/mickael-kerjean/filestash/server/common"
"github.com/mickael-kerjean/filestash/server/model/formater"
@ -12,337 +11,12 @@ import (
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
)
const (
PHASE_EXPLORE = "PHASE_EXPLORE"
PHASE_INDEXING = "PHASE_INDEXING"
PHASE_MAINTAIN = "PHASE_MAINTAIN"
PHASE_PAUSE = "PHASE_PAUSE"
MAX_HEAP_SIZE = 100000
)
var (
SEARCH_ENABLE func() bool
SEARCH_TIMEOUT func() time.Duration
SEARCH_PROCESS_MAX func() int
SEARCH_PROCESS_PAR func() int
SEARCH_REINDEX func() int
CYCLE_TIME func() int
INDEXING_EXT func() string
MAX_INDEXING_FSIZE func() int
INDEXING_EXCLUSION = []string{"/node_modules/", "/bower_components/", "/.cache/", "/.npm/", "/.git/"}
)
var SProc SearchProcess = SearchProcess{
idx: make([]SearchIndexer, 0),
n: -1,
}
func init() {
SEARCH_ENABLE = func() bool {
return Config.Get("features.search.enable").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Name = "enable"
f.Type = "enable"
f.Target = []string{"process_max", "process_par", "reindex_time", "cycle_time", "max_size", "indexer_ext"}
f.Description = "Enable/Disable full text search"
f.Placeholder = "Default: false"
f.Default = false
return f
}).Bool()
}
SEARCH_ENABLE()
SEARCH_TIMEOUT = func() time.Duration {
return time.Duration(Config.Get("features.search.explore_timeout").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Name = "explore_timeout"
f.Type = "number"
f.Default = 300
f.Description = `When full text search is disabled, the search engine recursively explore
directories to find results. Exploration can't last longer than what is configured here`
f.Placeholder = fmt.Sprintf("Default: %dms", f.Default)
return f
}).Int()) * time.Millisecond
}
SEARCH_TIMEOUT()
SEARCH_PROCESS_MAX = func() int {
return Config.Get("features.search.process_max").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "process_max"
f.Name = "process_max"
f.Type = "number"
f.Description = "Size of the pool containing the indexers"
f.Placeholder = "Default: 5"
f.Default = 5
return f
}).Int()
}
SEARCH_PROCESS_MAX()
SEARCH_PROCESS_PAR = func() int {
return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "process_par"
f.Name = "process_par"
f.Type = "number"
f.Description = "How many concurrent indexers are running in the same time (requires a restart)"
f.Placeholder = "Default: 2"
f.Default = 2
return f
}).Int()
}
SEARCH_PROCESS_PAR()
SEARCH_REINDEX = func() int {
return Config.Get("features.search.reindex_time").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "reindex_time"
f.Name = "reindex_time"
f.Type = "number"
f.Description = "Time in hours after which we consider our index to be stale and needs to be reindexed"
f.Placeholder = "Default: 24h"
f.Default = 24
return f
}).Int()
}
SEARCH_REINDEX()
CYCLE_TIME = func() int {
return Config.Get("features.search.cycle_time").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "cycle_time"
f.Name = "cycle_time"
f.Type = "number"
f.Description = "Time the indexer needs to spend for each cycle in seconds (discovery, indexing and maintenance)"
f.Placeholder = "Default: 10s"
f.Default = 10
return f
}).Int()
}
CYCLE_TIME()
MAX_INDEXING_FSIZE = func() int {
return Config.Get("features.search.max_size").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "max_size"
f.Name = "max_size"
f.Type = "number"
f.Description = "Maximum size of files the indexer will perform full text search"
f.Placeholder = "Default: 524288000 => 512MB"
f.Default = 524288000
return f
}).Int()
}
MAX_INDEXING_FSIZE()
INDEXING_EXT = func() string {
return Config.Get("features.search.indexer_ext").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Id = "indexer_ext"
f.Name = "indexer_ext"
f.Type = "string"
f.Description = "File extension we want to see indexed"
f.Placeholder = "Default: org,txt,docx,pdf,md,form"
f.Default = "org,txt,docx,pdf,md,form"
return f
}).String()
}
INDEXING_EXT()
onChange := Config.ListenForChange()
runner := func() {
startSearch := false
for {
if SEARCH_ENABLE() == false {
select {
case <-onChange.Listener:
startSearch = SEARCH_ENABLE()
}
if startSearch == false {
continue
}
}
sidx := SProc.Peek()
if sidx == nil {
time.Sleep(5 * time.Second)
continue
}
sidx.mu.Lock()
sidx.Execute()
sidx.mu.Unlock()
}
}
for i := 0; i < SEARCH_PROCESS_PAR(); i++ {
go runner()
}
}
func SearchStateful(app *App, path string, keyword string) []File {
var files []File = make([]File, 0)
// extract our search indexer
s := SProc.HintLs(app, path)
if s == nil {
return files
}
if path == "" {
path = "/"
}
rows, err := s.DB.Query(
"SELECT type, path, size, modTime FROM file WHERE path IN ("+
" SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?"+
" ORDER BY rank LIMIT 2000"+
")",
regexp.MustCompile(`(\.|\-)`).ReplaceAllString(keyword, "\"$1\""),
path, path+"~",
)
if err != nil {
return files
}
defer rows.Close()
for rows.Next() {
f := File{}
var t string
if err = rows.Scan(&f.FType, &f.FPath, &f.FSize, &t); err != nil {
Log.Warning("search::find search_error (%v)", err)
return files
}
if tm, err := time.Parse(time.RFC3339, t); err == nil {
f.FTime = tm.Unix() * 1000
}
f.FName = filepath.Base(f.FPath)
files = append(files, f)
}
return files
}
type SearchProcess struct {
idx []SearchIndexer
n int
mu sync.RWMutex
}
func (this *SearchProcess) HintLs(app *App, path string) *SearchIndexer {
id := GenerateID(app)
// try to find the search indexer among the existing ones
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
alreadyHasPath := false
for j := 0; j < len(this.idx[i].FoldersUnknown); j++ {
if this.idx[i].FoldersUnknown[j].Path == path {
alreadyHasPath = true
break
}
}
if alreadyHasPath == false {
heap.Push(&this.idx[i].FoldersUnknown, &Document{
Type: "directory",
Path: path,
InitialPath: path,
Name: filepath.Base(path),
})
}
ret := &this.idx[i]
this.mu.RUnlock()
return ret
}
}
this.mu.RUnlock()
// Having all indexers running in memory could be expensive => instead we're cycling a pool
search_process_max := SEARCH_PROCESS_MAX()
this.mu.Lock()
lenIdx := len(this.idx)
if lenIdx > 0 && search_process_max > 0 && lenIdx > (search_process_max-1) {
toDel := this.idx[0 : lenIdx-(search_process_max-1)]
for i := range toDel {
toDel[i].DB.Close()
}
this.idx = this.idx[lenIdx-(search_process_max-1):]
}
// instantiate the new indexer
s := NewSearchIndexer(id, app.Backend)
heap.Push(&s.FoldersUnknown, &Document{
Type: "directory",
Path: path,
InitialPath: path,
Name: filepath.Base(path),
})
this.idx = append(this.idx, s)
this.mu.Unlock()
return &s
}
func (this *SearchProcess) HintRm(app *App, path string) {
id := GenerateID(app)
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
this.idx[i].DB.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path+"~")
break
}
}
this.mu.RUnlock()
}
func (this *SearchProcess) HintFile(app *App, path string) {
id := GenerateID(app)
this.mu.RLock()
for i := len(this.idx) - 1; i >= 0; i-- {
if id == this.idx[i].Id {
this.idx[i].DB.Exec("UPDATE file set indexTime = NULL WHERE path = ?", path)
break
}
}
this.mu.RUnlock()
}
func (this *SearchProcess) Peek() *SearchIndexer {
if len(this.idx) == 0 {
return nil
}
this.mu.Lock()
if this.n >= len(this.idx)-1 || this.n < 0 {
this.n = 0
} else {
this.n = this.n + 1
}
s := &this.idx[this.n]
this.mu.Unlock()
return s
}
func (this *SearchProcess) Reset() {
this.mu.Lock()
for i := range this.idx {
this.idx[i].DB.Close()
}
this.idx = make([]SearchIndexer, 0)
this.mu.Unlock()
this.n = -1
}
type SearchIndexer struct {
Id string
FoldersUnknown HeapDoc
@ -811,49 +485,3 @@ func (this *SearchIndexer) dbDelete(parent string, f os.FileInfo, tx *sql.Tx) er
)
return err
}
type Document struct {
Hash string `json:"-"`
Type string `json:"type"`
Name string `json:"name"`
Path string `json:"path"`
InitialPath string `json:"-"`
Ext string `json:"ext"`
ModTime time.Time `json:"time"`
Size int64 `json:"size"`
Content []byte `json:"content"`
Priority int `json:"-"`
}
// https://golang.org/pkg/container/heap/
type HeapDoc []*Document
func (h HeapDoc) Len() int { return len(h) }
func (h HeapDoc) Less(i, j int) bool {
if h[i].Priority != 0 || h[j].Priority != 0 {
return h[i].Priority < h[j].Priority
}
scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/"))
scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/"))
return scoreA < scoreB
}
func (h HeapDoc) Swap(i, j int) {
a := h[i]
h[i] = h[j]
h[j] = a
}
func (h *HeapDoc) Push(x interface{}) {
if h.Len() < MAX_HEAP_SIZE {
*h = append(*h, x.(*Document))
}
}
func (h *HeapDoc) Pop() interface{} {
old := *h
n := len(old)
if n == 0 {
return nil
}
x := old[n-1]
*h = old[0 : n-1]
return x
}

View file

@ -0,0 +1,54 @@
package plg_search_sqlitefts
import (
"strings"
"time"
)
const MAX_HEAP_SIZE = 100000
type Document struct {
Hash string `json:"-"`
Type string `json:"type"`
Name string `json:"name"`
Path string `json:"path"`
InitialPath string `json:"-"`
Ext string `json:"ext"`
ModTime time.Time `json:"time"`
Size int64 `json:"size"`
Content []byte `json:"content"`
Priority int `json:"-"`
}
// https://golang.org/pkg/container/heap/
type HeapDoc []*Document
func (h HeapDoc) Len() int { return len(h) }
func (h HeapDoc) Less(i, j int) bool {
if h[i].Priority != 0 || h[j].Priority != 0 {
return h[i].Priority < h[j].Priority
}
scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/"))
scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/"))
return scoreA < scoreB
}
func (h HeapDoc) Swap(i, j int) {
a := h[i]
h[i] = h[j]
h[j] = a
}
func (h *HeapDoc) Push(x interface{}) {
if h.Len() < MAX_HEAP_SIZE {
*h = append(*h, x.(*Document))
}
}
func (h *HeapDoc) Pop() interface{} {
old := *h
n := len(old)
if n == 0 {
return nil
}
x := old[n-1]
*h = old[0 : n-1]
return x
}

View file

@ -0,0 +1,30 @@
package plg_search_stateless
import (
"fmt"
. "github.com/mickael-kerjean/filestash/server/common"
"time"
)
var (
SEARCH_TIMEOUT func() time.Duration
)
func init() {
SEARCH_TIMEOUT = func() time.Duration {
return time.Duration(Config.Get("features.search.explore_timeout").Schema(func(f *FormElement) *FormElement {
if f == nil {
f = &FormElement{}
}
f.Name = "explore_timeout"
f.Type = "number"
f.Default = 300
f.Description = `When full text search is disabled, the search engine recursively explore
directories to find results. Exploration can't last longer than what is configured here`
f.Placeholder = fmt.Sprintf("Default: %dms", f.Default)
return f
}).Int()) * time.Millisecond
}
SEARCH_TIMEOUT()
}

View file

@ -1,78 +1,30 @@
package model
package plg_search_stateless
import (
. "github.com/mickael-kerjean/filestash/server/common"
"os"
"path/filepath"
"strings"
"time"
)
func init() {
Hooks.Register.SearchEngine(StatelessSearch{})
}
type PathQuandidate struct {
Path string
Score int
}
func scoreBoostForPath(p string) int {
b := strings.ToLower(filepath.Base(p))
type StatelessSearch struct{}
// some path are garbage we don't want to explore unless there's nothing else to do
if b == "node_modules" {
return -100
} else if strings.HasPrefix(b, ".") {
return -10
}
// not all path are equally interesting, we bump the score of what we thing is interesting
score := 0
if strings.Contains(b, "document") {
score += 3
} else if strings.Contains(b, "project") {
score += 3
} else if strings.Contains(b, "home") {
score += 3
} else if strings.Contains(b, "note") {
score += 3
}
return score
}
func scoreBoostForFilesInDirectory(f []os.FileInfo) int {
s := 0
for i := 0; i < len(f); i++ {
name := f[i].Name()
if f[i].IsDir() == false {
if strings.HasSuffix(name, ".org") {
s += 2
} else if strings.HasSuffix(name, ".pdf") {
s += 1
} else if strings.HasSuffix(name, ".doc") || strings.HasSuffix(name, ".docx") {
s += 1
} else if strings.HasSuffix(name, ".md") {
s += 1
} else if strings.HasSuffix(name, ".pdf") {
s += 1
}
}
if s > 4 {
return 4
}
}
return s
}
func scoreBoostOnDepth(p string) int {
return -strings.Count(p, "/")
}
func SearchStateLess(app *App, path string, keyword string) []File {
files := make([]File, 0)
func (this StatelessSearch) Query(app App, path string, keyword string) ([]IFile, error) {
files := make([]IFile, 0)
toVisit := []PathQuandidate{PathQuandidate{path, 0}}
MAX_SEARCH_TIME := SEARCH_TIMEOUT()
for start := time.Now(); time.Since(start) < MAX_SEARCH_TIME; {
if len(toVisit) == 0 {
return files
return files, nil
}
currentPath := toVisit[0]
if len(toVisit) == 0 {
@ -140,5 +92,5 @@ func SearchStateLess(app *App, path string, keyword string) []File {
}
}
}
return files
return files, nil
}

View file

@ -0,0 +1,59 @@
package plg_search_stateless
import (
"os"
"path/filepath"
"strings"
)
func scoreBoostForPath(p string) int {
b := strings.ToLower(filepath.Base(p))
// some path are garbage we don't want to explore unless there's nothing else to do
if b == "node_modules" {
return -100
} else if strings.HasPrefix(b, ".") {
return -10
}
// not all path are equally interesting, we bump the score of what we thing is interesting
score := 0
if strings.Contains(b, "document") {
score += 3
} else if strings.Contains(b, "project") {
score += 3
} else if strings.Contains(b, "home") {
score += 3
} else if strings.Contains(b, "note") {
score += 3
}
return score
}
func scoreBoostForFilesInDirectory(f []os.FileInfo) int {
s := 0
for i := 0; i < len(f); i++ {
name := f[i].Name()
if f[i].IsDir() == false {
if strings.HasSuffix(name, ".org") {
s += 2
} else if strings.HasSuffix(name, ".pdf") {
s += 1
} else if strings.HasSuffix(name, ".doc") || strings.HasSuffix(name, ".docx") {
s += 1
} else if strings.HasSuffix(name, ".md") {
s += 1
} else if strings.HasSuffix(name, ".pdf") {
s += 1
}
}
if s > 4 {
return 4
}
}
return s
}
func scoreBoostOnDepth(p string) int {
return -strings.Count(p, "/")
}

View file

@ -1,68 +0,0 @@
package plg_security_killswitch
/*
* This package was made after the log4j CVE to have a way to remotly kill an instance if something
* terrible were to happen.
*/
import (
"encoding/json"
"fmt"
. "github.com/mickael-kerjean/filestash/server/common"
"net/http"
"os"
"time"
)
func init() {
Log.Debug("Killswitch enabled")
main()
go func() {
for range time.Tick(time.Second * 1800) { // every 60 minutes
main()
}
}()
}
func main() {
req, err := http.NewRequest(
"GET",
fmt.Sprintf(
"https://downloads.filestash.app/api/killswitch.php?version=%s&host=%s",
APP_VERSION+"."+BUILD_DATE,
Config.Get("general.host").String(),
),
nil,
)
if err != nil {
return
}
res, err := HTTPClient.Do(req)
if err != nil {
return
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return
}
d := struct {
Status string `json:"status"`
Action string `json:"action"`
Message string `json:"message"`
}{}
if err = json.NewDecoder(res.Body).Decode(&d); err != nil {
return
}
if d.Status != "ok" {
return
}
switch d.Action {
case "EXIT":
Log.Warning("REMOTE KILLSWITCH ENGAGED - %s", d.Message)
os.Exit(1)
default:
if d.Message != "" {
Log.Info("REMOTE MESSAGE - %s", d.Message)
}
}
}