mirror of
https://github.com/mickael-kerjean/filestash
synced 2025-12-16 05:18:38 +01:00
feature (search): search feature
This commit is contained in:
parent
e729feb43c
commit
0127fa2c02
19 changed files with 633 additions and 64 deletions
4
Makefile
4
Makefile
|
|
@ -1,8 +1,8 @@
|
|||
build_frontend:
|
||||
NODE_ENV=production npm run build
|
||||
|
||||
build_backend:
|
||||
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/ CGO_CFLAGS_ALLOW='-fopenmp' go build -ldflags "-X github.com/mickael-kerjean/filestash/server/common.BUILD_NUMBER=`date -u +%Y%m%d`" -o dist/filestash server/main.go
|
||||
build_backend:
|
||||
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/ CGO_CFLAGS_ALLOW='-fopenmp' go build --tags "fts5" -ldflags "-X github.com/mickael-kerjean/filestash/server/common.BUILD_NUMBER=`date -u +%Y%m%d`" -o dist/filestash server/main.go
|
||||
|
||||
build_plugins:
|
||||
go build -buildmode=plugin -o ./dist/data/plugin/image.so server/plugin/plg_image_light/index.go
|
||||
|
|
|
|||
|
|
@ -368,6 +368,13 @@ class FileSystem{
|
|||
});
|
||||
}
|
||||
|
||||
search(keyword, path = "/"){
|
||||
const url = appendShareToUrl("/api/files/search?path="+prepare(path)+"&q="+encodeURIComponent(keyword))
|
||||
return http_get(url).then((res) => {
|
||||
return res.results
|
||||
});
|
||||
}
|
||||
|
||||
frequents(){
|
||||
let data = [];
|
||||
return cache.fetchAll((value) => {
|
||||
|
|
|
|||
|
|
@ -390,15 +390,24 @@ export const onUpload = function(path, e){
|
|||
|
||||
const worker = new Worker();
|
||||
export const onSearch = (keyword, path = "/") => {
|
||||
worker.postMessage({
|
||||
action: "search::find",
|
||||
path: path,
|
||||
share: currentShare(),
|
||||
keyword: keyword
|
||||
});
|
||||
if(navigator.onLine == false){
|
||||
notify.send("Result aren't complete because you're not online", "info");
|
||||
worker.postMessage({
|
||||
action: "search::find",
|
||||
path: path,
|
||||
share: currentShare(),
|
||||
keyword: keyword
|
||||
});
|
||||
return new Observable((obs) => {
|
||||
worker.onmessage = (m) => {
|
||||
if(m.data.type === "search::found"){
|
||||
obs.next(m.data && m.data.files || []);
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
return new Observable((obs) => {
|
||||
worker.onmessage = (m) => {
|
||||
obs.next(m.data);
|
||||
};
|
||||
Files.search(keyword, path).then((f) => obs.next(f))
|
||||
});
|
||||
};
|
||||
|
|
|
|||
|
|
@ -200,21 +200,18 @@ export class FilesPage extends React.Component {
|
|||
if(search.length < 2){
|
||||
return;
|
||||
}
|
||||
|
||||
if(this._search){
|
||||
this._search.unsubscribe();
|
||||
}
|
||||
|
||||
this._search = onSearch(search, this.state.path).subscribe((message) => {
|
||||
if(message.type === "search::found"){
|
||||
this.setState({
|
||||
files: message.files || [],
|
||||
metadata: {
|
||||
can_rename: false,
|
||||
can_delete: false
|
||||
}
|
||||
});
|
||||
}
|
||||
this._search = onSearch(search, this.state.path).subscribe((f) => {
|
||||
this.setState({
|
||||
files: f || [],
|
||||
metadata: {
|
||||
can_rename: false,
|
||||
can_delete: false,
|
||||
can_share: false
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ export class FileSystem extends React.PureComponent {
|
|||
<p className="empty_image">
|
||||
<Icon name="file"/>
|
||||
</p>
|
||||
<p>This folder is empty</p>
|
||||
<p>There is nothing here</p>
|
||||
</NgIf>
|
||||
</Container>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ export class NewThing extends React.Component {
|
|||
type: null,
|
||||
message: null,
|
||||
icon: null,
|
||||
search_enabled: "ServiceWorker" in window ? true : false,
|
||||
search_enabled: CONFIG.enable_search || false,
|
||||
search_input_visible: false,
|
||||
search_keyword: ""
|
||||
};
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ func NewQuickCache(arg ...time.Duration) AppCache {
|
|||
|
||||
type KeyValueStore struct {
|
||||
cache map[string]interface{}
|
||||
sync.RWMutex
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func NewKeyValueStore() KeyValueStore {
|
||||
|
|
@ -87,15 +87,16 @@ func NewKeyValueStore() KeyValueStore {
|
|||
}
|
||||
|
||||
func (this KeyValueStore) Get(key string) interface{} {
|
||||
this.RLock()
|
||||
defer this.RUnlock()
|
||||
return this.cache[key]
|
||||
this.Lock()
|
||||
val := this.cache[key]
|
||||
this.Unlock()
|
||||
return val
|
||||
}
|
||||
|
||||
func (this *KeyValueStore) Set(key string, value interface{}) {
|
||||
this.Lock()
|
||||
defer this.Unlock()
|
||||
this.cache[key] = value
|
||||
this.Unlock()
|
||||
}
|
||||
|
||||
func (this *KeyValueStore) Clear() {
|
||||
|
|
|
|||
|
|
@ -78,12 +78,6 @@ func NewConfiguration() Configuration {
|
|||
Form{
|
||||
Title: "features",
|
||||
Form: []Form{
|
||||
Form{
|
||||
Title: "search",
|
||||
Elmnts: []FormElement{
|
||||
FormElement{Name: "enable", Type: "boolean", Default: true, Description: "Enable/Disable the search feature"},
|
||||
},
|
||||
},
|
||||
Form{
|
||||
Title: "share",
|
||||
Elmnts: []FormElement{
|
||||
|
|
|
|||
|
|
@ -1,11 +1,18 @@
|
|||
package common
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
const (
|
||||
APP_VERSION = "v0.4"
|
||||
CONFIG_PATH = "data/config/"
|
||||
PLUGIN_PATH = "data/plugin/"
|
||||
LOG_PATH = "data/log/"
|
||||
TMP_PATH = "data/tmp/"
|
||||
LOG_PATH = "data/state/log/"
|
||||
CONFIG_PATH = "data/state/config/"
|
||||
DB_PATH = "data/state/db/"
|
||||
FTS_PATH = "data/state/db/search/"
|
||||
TMP_PATH = "data/cache/tmp/"
|
||||
COOKIE_NAME_AUTH = "auth"
|
||||
COOKIE_NAME_PROOF = "proof"
|
||||
COOKIE_NAME_ADMIN = "admin"
|
||||
|
|
@ -16,6 +23,15 @@ const (
|
|||
URL_SETUP = "/admin/setup"
|
||||
)
|
||||
|
||||
func init(){
|
||||
os.MkdirAll(filepath.Join(GetCurrentDir(), LOG_PATH), os.ModePerm)
|
||||
os.MkdirAll(filepath.Join(GetCurrentDir(), FTS_PATH), os.ModePerm)
|
||||
os.MkdirAll(filepath.Join(GetCurrentDir(), CONFIG_PATH), os.ModePerm)
|
||||
os.RemoveAll(filepath.Join(GetCurrentDir(), TMP_PATH))
|
||||
os.MkdirAll(filepath.Join(GetCurrentDir(), TMP_PATH), os.ModePerm)
|
||||
}
|
||||
|
||||
|
||||
var (
|
||||
BUILD_NUMBER string
|
||||
SECRET_KEY string
|
||||
|
|
@ -24,7 +40,6 @@ var (
|
|||
SECRET_KEY_DERIVATE_FOR_USER string
|
||||
)
|
||||
|
||||
|
||||
/*
|
||||
* Improve security by calculating derivative of the secret key to restrict the attack surface
|
||||
* in the worst case scenario with one compromise secret key
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ var logfile *os.File
|
|||
func init(){
|
||||
var err error
|
||||
logPath := filepath.Join(GetCurrentDir(), LOG_PATH)
|
||||
os.MkdirAll(logPath, os.ModePerm)
|
||||
logfile, err = os.OpenFile(filepath.Join(logPath, "access.log"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, os.ModePerm)
|
||||
if err != nil {
|
||||
slog.Printf("ERROR log file: %+v", err)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ type File struct {
|
|||
FName string `json:"name"`
|
||||
FType string `json:"type"`
|
||||
FTime int64 `json:"time"`
|
||||
FSize int64 `json:"size"`
|
||||
FSize int64 `json:"size"`
|
||||
FPath string `json:"path,omitempty"`
|
||||
CanRename *bool `json:"can_rename,omitempty"`
|
||||
CanMove *bool `json:"can_move_directory,omitempty"`
|
||||
CanDelete *bool `json:"can_delete,omitempty"`
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
|
||||
var (
|
||||
logpath = filepath.Join(GetCurrentDir(), LOG_PATH, "access.log")
|
||||
cachepath = filepath.Join(GetCurrentDir(), CONFIG_PATH, "config.json")
|
||||
configpath = filepath.Join(GetCurrentDir(), CONFIG_PATH, "config.json")
|
||||
pluginpath = filepath.Join(GetCurrentDir(), PLUGIN_PATH)
|
||||
)
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ func PrivateConfigHandler(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||
func PrivateConfigUpdateHandler(ctx App, res http.ResponseWriter, req *http.Request) {
|
||||
b, _ := ioutil.ReadAll(req.Body)
|
||||
b = PrettyPrint(b)
|
||||
file, err := os.Create(cachepath)
|
||||
file, err := os.Create(configpath)
|
||||
if err != nil {
|
||||
SendErrorResult(res, err)
|
||||
return
|
||||
|
|
|
|||
|
|
@ -14,13 +14,6 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
|
||||
var EXPORT_PATH string
|
||||
func init() {
|
||||
EXPORT_PATH = GetAbsolutePath(TMP_PATH)
|
||||
os.RemoveAll(EXPORT_PATH)
|
||||
os.MkdirAll(EXPORT_PATH, os.ModePerm)
|
||||
}
|
||||
func FileExport(ctx App, res http.ResponseWriter, req *http.Request) {
|
||||
http.SetCookie(res, &http.Cookie{
|
||||
Name: "download",
|
||||
|
|
@ -41,7 +34,7 @@ func FileExport(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
var tmpPath string = EXPORT_PATH + "/export_" + QuickString(10)
|
||||
var tmpPath string = GetAbsolutePath(TMP_PATH) + "/export_" + QuickString(10)
|
||||
var cmd *exec.Cmd
|
||||
var emacsPath string
|
||||
var outPath string
|
||||
|
|
|
|||
|
|
@ -22,15 +22,11 @@ type FileInfo struct {
|
|||
Time int64 `json:"time"`
|
||||
}
|
||||
|
||||
const FileCachePath = "data/cache/tmp/"
|
||||
|
||||
var FileCache AppCache
|
||||
|
||||
func init() {
|
||||
FileCache = NewAppCache()
|
||||
cachePath := filepath.Join(GetCurrentDir(), FileCachePath)
|
||||
os.RemoveAll(cachePath)
|
||||
os.MkdirAll(cachePath, os.ModePerm)
|
||||
cachePath := filepath.Join(GetCurrentDir(), TMP_PATH)
|
||||
FileCache.OnEvict(func(key string, value interface{}) {
|
||||
os.RemoveAll(filepath.Join(cachePath, key))
|
||||
})
|
||||
|
|
@ -39,7 +35,7 @@ func init() {
|
|||
func FileLs(ctx App, res http.ResponseWriter, req *http.Request) {
|
||||
if model.CanRead(&ctx) == false {
|
||||
if model.CanUpload(&ctx) == false {
|
||||
SendErrorResult(res, NewError("Permission denied", 403))
|
||||
SendErrorResult(res, ErrPermissionDenied)
|
||||
return
|
||||
}
|
||||
SendSuccessResults(res, make([]FileInfo, 0))
|
||||
|
|
@ -50,6 +46,7 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||
SendErrorResult(res, err)
|
||||
return
|
||||
}
|
||||
model.SProc.Append(&ctx, path) // ping the search indexer
|
||||
|
||||
entries, err := ctx.Backend.Ls(path)
|
||||
if err != nil {
|
||||
|
|
@ -179,7 +176,7 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
tmpPath := filepath.Join(GetCurrentDir(), FileCachePath, "file_" + QuickString(20) + ".dat")
|
||||
tmpPath := filepath.Join(GetCurrentDir(), filepath.Join(GetCurrentDir(), TMP_PATH), "file_" + QuickString(20) + ".dat")
|
||||
f, err := os.OpenFile(tmpPath, os.O_RDWR|os.O_CREATE, os.ModePerm);
|
||||
if err != nil {
|
||||
SendErrorResult(res, err)
|
||||
|
|
|
|||
25
server/ctrl/search.go
Normal file
25
server/ctrl/search.go
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
package ctrl
|
||||
|
||||
import (
|
||||
. "github.com/mickael-kerjean/filestash/server/common"
|
||||
"github.com/mickael-kerjean/filestash/server/model"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func FileSearch(ctx App, res http.ResponseWriter, req *http.Request) {
|
||||
if Config.Get("features.search.enable").Bool() == false {
|
||||
SendErrorResult(res, ErrNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
path, err := pathBuilder(ctx, req.URL.Query().Get("path"))
|
||||
if err != nil {
|
||||
path = "/"
|
||||
}
|
||||
q := req.URL.Query().Get("q")
|
||||
if model.CanRead(&ctx) == false {
|
||||
SendErrorResult(res, ErrPermissionDenied)
|
||||
return
|
||||
}
|
||||
SendSuccessResults(res, model.Search(&ctx, path, q))
|
||||
}
|
||||
|
|
@ -85,7 +85,7 @@ func SessionAuthenticate(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||
HttpOnly: true,
|
||||
SameSite: http.SameSiteStrictMode,
|
||||
}
|
||||
http.SetCookie(res, &cookie)
|
||||
http.SetCookie(res, &cookie)
|
||||
|
||||
if home == "" {
|
||||
SendSuccessResult(res, nil)
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ func Init(a *App) {
|
|||
files.HandleFunc("/rm", NewMiddlewareChain(FileRm, middlewares, *a)).Methods("GET")
|
||||
files.HandleFunc("/mkdir", NewMiddlewareChain(FileMkdir, middlewares, *a)).Methods("GET")
|
||||
files.HandleFunc("/touch", NewMiddlewareChain(FileTouch, middlewares, *a)).Methods("GET")
|
||||
middlewares = []Middleware{ ApiHeaders, SessionStart, LoggedInOnly }
|
||||
files.HandleFunc("/search", NewMiddlewareChain(FileSearch, middlewares, *a)).Methods("GET")
|
||||
|
||||
// API for exporter
|
||||
middlewares = []Middleware{ ApiHeaders, SecureHeaders, RedirectSharedLoginIfNeeded, SessionStart, LoggedInOnly }
|
||||
|
|
|
|||
|
|
@ -11,13 +11,11 @@ import (
|
|||
|
||||
var DB *sql.DB
|
||||
|
||||
const DBCachePath = "data/"
|
||||
|
||||
func init() {
|
||||
cachePath := filepath.Join(GetCurrentDir(), DBCachePath)
|
||||
cachePath := filepath.Join(GetCurrentDir(), DB_PATH)
|
||||
os.MkdirAll(cachePath, os.ModePerm)
|
||||
var err error
|
||||
if DB, err = sql.Open("sqlite3", cachePath+"/db.sql?_fk=true"); err != nil {
|
||||
if DB, err = sql.Open("sqlite3", cachePath+"/share.sql?_fk=true"); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
531
server/model/search.go
Normal file
531
server/model/search.go
Normal file
|
|
@ -0,0 +1,531 @@
|
|||
package model
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
. "github.com/mickael-kerjean/filestash/server/common"
|
||||
"hash/fnv"
|
||||
"math/rand"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
PHASE_EXPLORE = "PHASE_EXPLORE"
|
||||
PHASE_INDEXING = "PHASE_INDEXING"
|
||||
PHASE_MAINTAIN = "PHASE_MAINTAIN"
|
||||
)
|
||||
var (
|
||||
SEARCH_ENABLE func() bool
|
||||
SEARCH_PROCESS_MAX func() int
|
||||
SEARCH_PROCESS_PAR func() int
|
||||
SEARCH_REINDEX func() int
|
||||
CYCLE_TIME func() int
|
||||
MAX_INDEXING_FSIZE func() int
|
||||
INDEXING_EXT func() string
|
||||
)
|
||||
|
||||
var SProc SearchProcess = SearchProcess{
|
||||
idx: make([]SearchIndexer, 0),
|
||||
n: -1,
|
||||
}
|
||||
|
||||
func init(){
|
||||
SEARCH_ENABLE = func() bool {
|
||||
return Config.Get("features.search.enable").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Name = "enable"
|
||||
f.Type = "enable"
|
||||
f.Target = []string{"process_max", "process_par", "reindex_time", "cycle_time", "max_size", "indexer_ext"}
|
||||
f.Description = "Enable/Disable the search feature"
|
||||
f.Placeholder = "Default: false"
|
||||
f.Default = false
|
||||
return f
|
||||
}).Bool()
|
||||
}
|
||||
SEARCH_ENABLE()
|
||||
SEARCH_PROCESS_MAX = func() int {
|
||||
return Config.Get("features.search.process_max").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "process_max"
|
||||
f.Name = "process_max"
|
||||
f.Type = "number"
|
||||
f.Description = "Size of the pool containing the indexers"
|
||||
f.Placeholder = "Default: 5"
|
||||
f.Default = 5
|
||||
return f
|
||||
}).Int()
|
||||
}
|
||||
SEARCH_PROCESS_MAX()
|
||||
SEARCH_PROCESS_PAR = func() int {
|
||||
return 1
|
||||
return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "process_par"
|
||||
f.Name = "process_par"
|
||||
f.Type = "number"
|
||||
f.Description = "How many concurrent indexers are running in the same time (requires a restart)"
|
||||
f.Placeholder = "Default: 2"
|
||||
f.Default = 2
|
||||
return f
|
||||
}).Int()
|
||||
}
|
||||
SEARCH_PROCESS_PAR()
|
||||
SEARCH_REINDEX = func() int {
|
||||
return Config.Get("features.search.reindex_time").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "reindex_time"
|
||||
f.Name = "reindex_time"
|
||||
f.Type = "number"
|
||||
f.Description = "Time in hours after which we consider our index to be stale and needs to be reindexed"
|
||||
f.Placeholder = "Default: 24h"
|
||||
f.Default = 24
|
||||
return f
|
||||
}).Int()
|
||||
}
|
||||
SEARCH_REINDEX()
|
||||
CYCLE_TIME = func() int {
|
||||
return Config.Get("features.search.cycle_time").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "cycle_time"
|
||||
f.Name = "cycle_time"
|
||||
f.Type = "number"
|
||||
f.Description = "Time the indexer needs to spend for each cycle in seconds (discovery, indexing and maintenance)"
|
||||
f.Placeholder = "Default: 10s"
|
||||
f.Default = 10
|
||||
return f
|
||||
}).Int()
|
||||
}
|
||||
CYCLE_TIME()
|
||||
MAX_INDEXING_FSIZE = func() int {
|
||||
return Config.Get("features.search.max_size").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "max_size"
|
||||
f.Name = "max_size"
|
||||
f.Type = "number"
|
||||
f.Description = "Maximum size of files the indexer will perform full text search"
|
||||
f.Placeholder = "Default: 524288000 => 512MB"
|
||||
f.Default = 524288000
|
||||
return f
|
||||
}).Int()
|
||||
}
|
||||
MAX_INDEXING_FSIZE()
|
||||
INDEXING_EXT = func() string {
|
||||
return Config.Get("features.search.indexer_ext").Schema(func(f *FormElement) *FormElement {
|
||||
if f == nil {
|
||||
f = &FormElement{}
|
||||
}
|
||||
f.Id = "indexer_ext"
|
||||
f.Name = "indexer_ext"
|
||||
f.Type = "string"
|
||||
f.Description = "File extension we want to see indexed"
|
||||
f.Placeholder = "Default: org,txt,docx,pdf,md"
|
||||
f.Default = "/"
|
||||
return f
|
||||
}).String()
|
||||
}
|
||||
INDEXING_EXT()
|
||||
|
||||
runner := func() {
|
||||
for {
|
||||
if SEARCH_ENABLE() == false {
|
||||
time.Sleep(60 * time.Second)
|
||||
continue
|
||||
}
|
||||
sidx := SProc.Peek()
|
||||
if sidx == nil {
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
} else if sidx.FoldersUnknown.Len() == 0 {
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
}
|
||||
sidx.mu.Lock()
|
||||
sidx.Execute()
|
||||
sidx.mu.Unlock()
|
||||
}
|
||||
}
|
||||
for i:=0; i<SEARCH_PROCESS_PAR(); i++ {
|
||||
go runner()
|
||||
}
|
||||
}
|
||||
|
||||
func Search(app *App, path string, keyword string) []File {
|
||||
var files []File = make([]File, 0)
|
||||
|
||||
// extract our search indexer
|
||||
s := SProc.Append(app, path)
|
||||
if s == nil {
|
||||
return files
|
||||
}
|
||||
|
||||
if path == "" {
|
||||
path = "/"
|
||||
}
|
||||
|
||||
rows, err := s.db.Query(
|
||||
"SELECT type, path, size, modTime FROM file WHERE path IN (SELECT path FROM file_index WHERE file_index MATCH ? AND path >= ? AND path < ? ORDER BY rank LIMIT 2000)",
|
||||
regexp.MustCompile(`(\.|\-)`).ReplaceAllString(keyword, "\"$1\""),
|
||||
path, path + "~",
|
||||
)
|
||||
if err != nil {
|
||||
return files
|
||||
}
|
||||
for rows.Next() {
|
||||
f := File{}
|
||||
var t string
|
||||
if err = rows.Scan(&f.FType, &f.FPath, &f.FSize, &t); err != nil {
|
||||
Log.Warning("search::find search_error (%v)", err)
|
||||
return files
|
||||
}
|
||||
if tm, err := time.Parse(time.RFC3339, t); err == nil {
|
||||
f.FTime = tm.Unix() * 1000
|
||||
}
|
||||
f.FName = filepath.Base(f.FPath)
|
||||
files = append(files, f)
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
type SearchProcess struct {
|
||||
idx []SearchIndexer
|
||||
n int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func(this *SearchProcess) Append(app *App, path string) *SearchIndexer {
|
||||
id := GenerateID(app)
|
||||
this.mu.Lock()
|
||||
defer this.mu.Unlock()
|
||||
|
||||
// try to find the search indexer among the existing ones
|
||||
for i:=len(this.idx)-1; i>=0; i-- {
|
||||
if id == this.idx[i].Id {
|
||||
alreadyHasPath := false
|
||||
for j:=0; j<len(this.idx[i].FoldersUnknown); j++ {
|
||||
if this.idx[i].FoldersUnknown[j].Path == path {
|
||||
alreadyHasPath = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if alreadyHasPath == false {
|
||||
heap.Push(&this.idx[i].FoldersUnknown, &Document{
|
||||
Type: "directory",
|
||||
Path: path,
|
||||
InitialPath: path,
|
||||
Name: filepath.Base(path),
|
||||
})
|
||||
}
|
||||
return &this.idx[i]
|
||||
}
|
||||
}
|
||||
|
||||
// Having all indexers running in memory could be expensive => instead we're cycling a pool
|
||||
search_process_max := 2//SEARCH_PROCESS_MAX()
|
||||
if len(this.idx) > ( search_process_max - 1) {
|
||||
toDel := this.idx[0 : len(this.idx) - ( search_process_max - 1)]
|
||||
for i := range toDel {
|
||||
toDel[i].db.Close()
|
||||
}
|
||||
this.idx = this.idx[len(this.idx) - ( search_process_max - 1) :]
|
||||
}
|
||||
|
||||
// instantiate the new indexer
|
||||
s := NewSearchIndexer(id, app.Backend)
|
||||
heap.Push(&s.FoldersUnknown, &Document{
|
||||
Type: "directory",
|
||||
Path: path,
|
||||
InitialPath: path,
|
||||
Name: filepath.Base(path),
|
||||
})
|
||||
this.idx = append(this.idx, s)
|
||||
return &s
|
||||
}
|
||||
|
||||
func(this *SearchProcess) Peek() *SearchIndexer {
|
||||
if len(this.idx) == 0 {
|
||||
return nil
|
||||
}
|
||||
this.mu.Lock()
|
||||
if this.n >= len(this.idx) - 1 || this.n < 0 {
|
||||
this.n = 0
|
||||
} else {
|
||||
this.n = this.n + 1
|
||||
}
|
||||
s := &this.idx[this.n]
|
||||
this.mu.Unlock()
|
||||
return s
|
||||
}
|
||||
|
||||
|
||||
type SearchIndexer struct {
|
||||
Id string
|
||||
FoldersUnknown HeapDoc
|
||||
FilesUnknown HeapDoc
|
||||
Backend IBackend
|
||||
db *sql.DB
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
||||
s := SearchIndexer {
|
||||
Id: id,
|
||||
Backend: b,
|
||||
FoldersUnknown: make(HeapDoc, 0, 1),
|
||||
FilesUnknown: make(HeapDoc, 0, 1),
|
||||
}
|
||||
heap.Init(&s.FoldersUnknown)
|
||||
heap.Init(&s.FilesUnknown)
|
||||
|
||||
db, err := sql.Open("sqlite3", filepath.Join(GetCurrentDir(), FTS_PATH, "fts_" + id + ".sql"))
|
||||
if err != nil {
|
||||
Log.Warning("search::init can't open database (%v)", err)
|
||||
return s
|
||||
}
|
||||
queryDB := func(sqlQuery string) error {
|
||||
stmt, err := db.Prepare(sqlQuery);
|
||||
if err != nil {
|
||||
Log.Warning("search::initschema prepare schema error(%v)", err)
|
||||
return err
|
||||
}
|
||||
_, err = stmt.Exec()
|
||||
if err != nil {
|
||||
Log.Warning("search::initschema execute error(%v)", err)
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
if queryDB("CREATE TABLE IF NOT EXISTS file(path VARCHAR(1024) PRIMARY KEY, filename VARCHAR(64), filetype VARCHAR(16), type VARCHAR(16), size INTEGER, modTime timestamp, indexTime timestamp DEFAULT NULL);"); err != nil {
|
||||
return s
|
||||
}
|
||||
if queryDB("CREATE VIRTUAL TABLE IF NOT EXISTS file_index USING fts5(path UNINDEXED, filename, filetype, content);"); err != nil {
|
||||
return s
|
||||
}
|
||||
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_insert AFTER INSERT ON file BEGIN INSERT INTO file_index (path, filename, filetype) VALUES(new.path, new.filename, new.filetype); END;"); err != nil {
|
||||
return s
|
||||
}
|
||||
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_delete AFTER DELETE ON file BEGIN DELETE FROM file_index WHERE path = old.path; END;"); err != nil {
|
||||
return s
|
||||
}
|
||||
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_update_path UPDATE OF path ON file BEGIN UPDATE file_index SET path = new.path, filepath = new.filepath, filetype = new.filetype WHERE path = old.path; END;"); err != nil {
|
||||
return s
|
||||
}
|
||||
s.db = db
|
||||
return s
|
||||
}
|
||||
|
||||
func(this *SearchIndexer) Execute(){
|
||||
currentPhase := func() string {
|
||||
if len(this.FoldersUnknown) != 0 {
|
||||
return PHASE_EXPLORE
|
||||
}
|
||||
if len(this.FilesUnknown) != 0 {
|
||||
return PHASE_INDEXING
|
||||
}
|
||||
return PHASE_MAINTAIN
|
||||
}()
|
||||
cycleExecute := func(fn func() bool) {
|
||||
stopTime := time.Now().Add(time.Duration(CYCLE_TIME()) * time.Second)
|
||||
for {
|
||||
if fn() == false {
|
||||
break
|
||||
}
|
||||
if stopTime.After(time.Now()) == false {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if currentPhase == PHASE_EXPLORE {
|
||||
cycleExecute(this.Discover)
|
||||
return
|
||||
} else if currentPhase == PHASE_INDEXING {
|
||||
r := rand.Intn(100)
|
||||
if r < 30 {
|
||||
cycleExecute(this.Bookkeeping)
|
||||
return
|
||||
}
|
||||
cycleExecute(this.Indexing)
|
||||
return
|
||||
} else if currentPhase == PHASE_MAINTAIN {
|
||||
cycleExecute(this.Bookkeeping)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func(this *SearchIndexer) Discover() bool {
|
||||
if this.FoldersUnknown.Len() == 0 {
|
||||
return false
|
||||
}
|
||||
doc := heap.Pop(&this.FoldersUnknown).(*Document)
|
||||
if doc == nil {
|
||||
return false
|
||||
}
|
||||
files, err := this.Backend.Ls(doc.Path)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
if len(files) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// We don't want our indexer to go wild and diverge over time. As such we need to detect those edge cases: aka
|
||||
// recursive folder structure. Our detection is relying on a Hash of []os.FileInfo
|
||||
hashFiles := func() string {
|
||||
var step int = len(files) / 50
|
||||
if step == 0 {
|
||||
step = 1
|
||||
}
|
||||
hasher := fnv.New32()
|
||||
hasher.Write([]byte(strconv.Itoa(len(files))))
|
||||
for i:=0; i<len(files); i = i+step {
|
||||
hasher.Write([]byte(files[i].Name()))
|
||||
}
|
||||
return base64.StdEncoding.EncodeToString(hasher.Sum(nil))
|
||||
}()
|
||||
for i:=0; i<this.FoldersUnknown.Len(); i++ {
|
||||
if this.FoldersUnknown[i].Hash == hashFiles && filepath.Base(doc.Path) != filepath.Base(this.FoldersUnknown[i].Path) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the newly found data within our index
|
||||
tx, _ := this.db.Begin()
|
||||
tx.Exec("BEGIN EXCLUSIVE TRANSACTION;")
|
||||
for i := range files {
|
||||
f := files[i]
|
||||
name := f.Name()
|
||||
p := filepath.Join(doc.Path, name)
|
||||
if f.IsDir() {
|
||||
p += "/"
|
||||
_, err = tx.Exec(
|
||||
"INSERT INTO file(path, filename, type, size, modTime, indexTime) VALUES(?, ?, ?, ?, ?, ?)",
|
||||
p,
|
||||
name,
|
||||
"directory",
|
||||
f.Size(),
|
||||
f.ModTime(),
|
||||
time.Now(),
|
||||
);
|
||||
var performPush bool = false
|
||||
if err == nil {
|
||||
performPush = true
|
||||
} else if e, ok := err.(sqlite3.Error); ok && e.Code == sqlite3.ErrConstraint {
|
||||
performPush = func(path string) bool{
|
||||
var t string
|
||||
var err error
|
||||
if err := tx.QueryRow("SELECT indexTime FROM file WHERE path = ?", p).Scan(&t); err != nil {
|
||||
Log.Warning("search::discovery unknown_path (%v)", err)
|
||||
return false
|
||||
}
|
||||
tm, err := time.Parse(time.RFC3339, t);
|
||||
if err != nil {
|
||||
Log.Warning("search::discovery invalid_time (%v)", err)
|
||||
return false
|
||||
}
|
||||
if time.Now().Add(time.Duration(- SEARCH_REINDEX()) * time.Hour).Before(tm) {
|
||||
return false
|
||||
}
|
||||
if _, err = tx.Exec("UPDATE file SET indexTime = ? WHERE path = ?", time.Now(), p); err != nil {
|
||||
Log.Warning("search::discovery insertion_failed (%v)", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}(p)
|
||||
}
|
||||
if performPush == true {
|
||||
heap.Push(&this.FoldersUnknown, &Document{
|
||||
Type: "directory",
|
||||
Name: name,
|
||||
Path: p,
|
||||
Size: f.Size(),
|
||||
ModTime: f.ModTime(),
|
||||
Hash: hashFiles,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
_, err = tx.Exec(
|
||||
"INSERT INTO file(path, filename, filetype, type, size, modTime) VALUES(?, ?, ?, ?, ?, ?)",
|
||||
filepath.Join(doc.Path, name),
|
||||
name,
|
||||
strings.TrimPrefix(filepath.Ext(name), "."),
|
||||
"file",
|
||||
f.Size(),
|
||||
f.ModTime(),
|
||||
)
|
||||
}
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
Log.Warning("search::discovery transaction_error (%v)", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func(this *SearchIndexer) Indexing() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func(this SearchIndexer) Bookkeeping() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func(this SearchIndexer) Consolidate() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
Hash string `json:"-"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
InitialPath string `json:"-"`
|
||||
Ext string `json:"ext"`
|
||||
ModTime time.Time `json:"time"`
|
||||
Size int64 `json:"size"`
|
||||
Content []byte `json:"content"`
|
||||
}
|
||||
|
||||
// https://golang.org/pkg/container/heap/
|
||||
type HeapDoc []*Document
|
||||
func(h HeapDoc) Len() int { return len(h) }
|
||||
func(h HeapDoc) Less(i, j int) bool {
|
||||
scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/"))
|
||||
scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/"))
|
||||
return scoreA < scoreB
|
||||
}
|
||||
func(h HeapDoc) Swap(i, j int) {
|
||||
a := h[i]
|
||||
h[i] = h[j]
|
||||
h[j] = a
|
||||
}
|
||||
func (h *HeapDoc) Push(x interface{}) { *h = append(*h, x.(*Document)) }
|
||||
func (h *HeapDoc) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
Loading…
Reference in a new issue