Test database generator (#1513)

This commit is contained in:
WithoutPants 2021-06-23 08:29:10 +10:00 committed by GitHub
parent be2fe1de26
commit 5ecea3f69f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 67426 additions and 0 deletions

View file

@ -0,0 +1,10 @@
This is a quick and dirty go script for generating a contrived database for testing purposes.
Edit the `config.yml` file to your liking. The numbers indicate the number of objects to generate, the `naming` section indicates the files from which to generate names.
May cause unexpected behaviour if run against an existing database file.
To run - from the `test_db_generator`:
`go run .`
The database file will be generated in the current directory.

View file

@ -0,0 +1,18 @@
database: generated.sqlite
scenes: 30000
images: 150000
galleries: 1500
markers: 300
performers: 10000
studios: 500
tags: 1500
naming:
scenes: scene.txt
performers:
male: male.txt
female: female.txt
surname: surname.txt
galleries: scene.txt
studios: studio.txt
tags: scene.txt
images: scene.txt

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,492 @@
// uild ignore
package main
import (
"context"
"database/sql"
"fmt"
"math/rand"
"os"
"strconv"
"time"
"github.com/stashapp/stash/pkg/database"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/sqlite"
"github.com/stashapp/stash/pkg/utils"
"gopkg.in/yaml.v2"
)
const batchSize = 1000
// create an example database by generating a number of scenes, markers,
// performers, studios and tags, and associating between them all
type config struct {
Database string `yaml:"database"`
Scenes int `yaml:"scenes"`
Markers int `yaml:"markers"`
Images int `yaml:"images"`
Galleries int `yaml:"galleries"`
Performers int `yaml:"performers"`
Studios int `yaml:"studios"`
Tags int `yaml:"tags"`
Naming namingConfig `yaml:"naming"`
}
var txnManager models.TransactionManager
var c *config
func main() {
var err error
c, err = loadConfig()
if err != nil {
panic(err)
}
initNaming(*c)
database.Initialize(c.Database)
populateDB()
}
func loadConfig() (*config, error) {
ret := &config{}
file, err := os.Open("config.yml")
if err != nil {
return nil, err
}
defer file.Close()
parser := yaml.NewDecoder(file)
parser.SetStrict(true)
err = parser.Decode(&ret)
if err != nil {
return nil, err
}
return ret, nil
}
func populateDB() {
makeTags(c.Tags)
makeStudios(c.Studios)
makePerformers(c.Performers)
makeScenes(c.Scenes)
makeImages(c.Images)
makeGalleries(c.Galleries)
}
func withTxn(f func(r models.Repository) error) error {
if txnManager == nil {
txnManager = sqlite.NewTransactionManager()
}
return txnManager.WithTxn(context.TODO(), f)
}
func retry(attempts int, fn func() error) error {
var err error
for tries := 0; tries < attempts; tries++ {
err = fn()
if err == nil {
return nil
}
}
return err
}
func makeTags(n int) {
for i := 0; i < n; i++ {
if err := retry(100, func() error {
return withTxn(func(r models.Repository) error {
name := names[c.Naming.Tags].generateName(1)
tag := models.Tag{
Name: name,
}
_, err := r.Tag().Create(tag)
return err
})
}); err != nil {
panic(err)
}
}
}
func makeStudios(n int) {
for i := 0; i < n; i++ {
if err := retry(100, func() error {
return withTxn(func(r models.Repository) error {
name := names[c.Naming.Tags].generateName(rand.Intn(5) + 1)
studio := models.Studio{
Name: sql.NullString{String: name, Valid: true},
Checksum: utils.MD5FromString(name),
}
if rand.Intn(100) > 5 {
ss, _, err := r.Studio().Query(nil, getRandomFilter(1))
if err != nil {
return err
}
if len(ss) > 0 {
studio.ParentID = sql.NullInt64{
Int64: int64(ss[0].ID),
Valid: true,
}
}
}
_, err := r.Studio().Create(studio)
return err
})
}); err != nil {
panic(err)
}
}
}
func makePerformers(n int) {
for i := 0; i < n; i++ {
if err := retry(100, func() error {
return withTxn(func(r models.Repository) error {
name := generatePerformerName()
performer := models.Performer{
Name: sql.NullString{String: name, Valid: true},
Checksum: utils.MD5FromString(name),
Favorite: sql.NullBool{
Bool: false,
Valid: true,
},
}
// TODO - set tags
_, err := r.Performer().Create(performer)
if err != nil {
err = fmt.Errorf("error creating performer with name: %s: %s", performer.Name.String, err.Error())
}
return err
})
}); err != nil {
panic(err)
}
}
}
func makeScenes(n int) {
logger.Infof("creating %d scenes...", n)
rand.Seed(533)
for i := 0; i < n; {
// do in batches of 1000
batch := i + batchSize
if err := withTxn(func(r models.Repository) error {
for ; i < batch && i < n; i++ {
scene := generateScene(i)
scene.StudioID = getRandomStudioID(r)
created, err := r.Scene().Create(scene)
if err != nil {
return err
}
makeSceneRelationships(r, created.ID)
}
return nil
}); err != nil {
panic(err)
}
logger.Infof("... created %d scenes", i)
}
}
func getResolution() (int64, int64) {
res := models.AllResolutionEnum[rand.Intn(len(models.AllResolutionEnum))]
h := int64(res.GetMaxResolution())
var w int64
if h == 240 || h == 480 || rand.Intn(10) == 9 {
w = h * 4 / 3
} else {
w = h * 16 / 9
}
if rand.Intn(10) == 9 {
return h, w
}
return w, h
}
func getDate() string {
s := rand.Int63n(time.Now().Unix())
d := time.Unix(s, 0)
return d.Format("2006-01-02")
}
func generateScene(i int) models.Scene {
path := utils.MD5FromString("scene/" + strconv.Itoa(i))
w, h := getResolution()
return models.Scene{
Path: path,
Title: sql.NullString{String: names[c.Naming.Scenes].generateName(rand.Intn(7) + 1), Valid: true},
Checksum: sql.NullString{String: utils.MD5FromString(path), Valid: true},
OSHash: sql.NullString{String: utils.MD5FromString(path), Valid: true},
Duration: sql.NullFloat64{
Float64: rand.Float64() * 14400,
Valid: true,
},
Height: models.NullInt64(h),
Width: models.NullInt64(w),
Date: models.SQLiteDate{
String: getDate(),
Valid: true,
},
}
}
func makeImages(n int) {
logger.Infof("creating %d images...", n)
rand.Seed(1293)
for i := 0; i < n; {
// do in batches of 1000
batch := i + batchSize
if err := withTxn(func(r models.Repository) error {
for ; i < batch && i < n; i++ {
image := generateImage(i)
image.StudioID = getRandomStudioID(r)
created, err := r.Image().Create(image)
if err != nil {
return err
}
makeImageRelationships(r, created.ID)
}
logger.Infof("... created %d images", i)
return nil
}); err != nil {
panic(err)
}
}
}
func generateImage(i int) models.Image {
path := utils.MD5FromString("image/" + strconv.Itoa(i))
w, h := getResolution()
return models.Image{
Title: sql.NullString{String: names[c.Naming.Images].generateName(rand.Intn(7) + 1), Valid: true},
Path: path,
Checksum: utils.MD5FromString(path),
Height: models.NullInt64(h),
Width: models.NullInt64(w),
}
}
func makeGalleries(n int) {
logger.Infof("creating %d galleries...", n)
rand.Seed(92113)
for i := 0; i < n; {
// do in batches of 1000
batch := i + batchSize
if err := withTxn(func(r models.Repository) error {
for ; i < batch && i < n; i++ {
gallery := generateGallery(i)
gallery.StudioID = getRandomStudioID(r)
created, err := r.Gallery().Create(gallery)
if err != nil {
return err
}
makeGalleryRelationships(r, created.ID)
}
return nil
}); err != nil {
panic(err)
}
logger.Infof("... created %d galleries", i)
}
}
func generateGallery(i int) models.Gallery {
path := utils.MD5FromString("gallery/" + strconv.Itoa(i))
return models.Gallery{
Title: sql.NullString{String: names[c.Naming.Galleries].generateName(rand.Intn(7) + 1), Valid: true},
Path: sql.NullString{String: path, Valid: true},
Checksum: utils.MD5FromString(path),
Date: models.SQLiteDate{
String: getDate(),
Valid: true,
},
}
}
func getRandomFilter(n int) *models.FindFilterType {
sortBy := "random"
return &models.FindFilterType{
Sort: &sortBy,
PerPage: &n,
}
}
func getRandomStudioID(r models.Repository) sql.NullInt64 {
if rand.Intn(10) == 0 {
return sql.NullInt64{}
}
// s, _, err := r.Studio().Query(nil, getRandomFilter(1))
// if err != nil {
// panic(err)
// }
return sql.NullInt64{
Int64: int64(rand.Int63n(int64(c.Studios)) + 1),
Valid: true,
}
}
func makeSceneRelationships(r models.Repository, id int) {
// add tags
tagIDs := getRandomTags(r)
if len(tagIDs) > 0 {
if err := r.Scene().UpdateTags(id, tagIDs); err != nil {
panic(err)
}
}
// add performers
performerIDs := getRandomPerformers(r)
if len(tagIDs) > 0 {
if err := r.Scene().UpdatePerformers(id, performerIDs); err != nil {
panic(err)
}
}
}
func makeImageRelationships(r models.Repository, id int) {
// add tags
tagIDs := getRandomTags(r)
if len(tagIDs) > 0 {
if err := r.Image().UpdateTags(id, tagIDs); err != nil {
panic(err)
}
}
// add performers
performerIDs := getRandomPerformers(r)
if len(tagIDs) > 0 {
if err := r.Image().UpdatePerformers(id, performerIDs); err != nil {
panic(err)
}
}
}
func makeGalleryRelationships(r models.Repository, id int) {
// add tags
tagIDs := getRandomTags(r)
if len(tagIDs) > 0 {
if err := r.Gallery().UpdateTags(id, tagIDs); err != nil {
panic(err)
}
}
// add performers
performerIDs := getRandomPerformers(r)
if len(tagIDs) > 0 {
if err := r.Gallery().UpdatePerformers(id, performerIDs); err != nil {
panic(err)
}
}
// add images
imageIDs := getRandomImages(r)
if len(tagIDs) > 0 {
if err := r.Gallery().UpdateImages(id, imageIDs); err != nil {
panic(err)
}
}
}
func getRandomPerformers(r models.Repository) []int {
n := rand.Intn(5)
var ret []int
// if n > 0 {
// p, _, err := r.Performer().Query(nil, getRandomFilter(n))
// if err != nil {
// panic(err)
// }
// for _, pp := range p {
// ret = utils.IntAppendUnique(ret, pp.ID)
// }
// }
for i := 0; i < n; i++ {
ret = utils.IntAppendUnique(ret, rand.Intn(c.Performers)+1)
}
return ret
}
func getRandomTags(r models.Repository) []int {
n := rand.Intn(15)
var ret []int
// if n > 0 {
// t, _, err := r.Tag().Query(nil, getRandomFilter(n))
// if err != nil {
// panic(err)
// }
// for _, tt := range t {
// ret = utils.IntAppendUnique(ret, tt.ID)
// }
// }
for i := 0; i < n; i++ {
ret = utils.IntAppendUnique(ret, rand.Intn(c.Tags)+1)
}
return ret
}
func getRandomImages(r models.Repository) []int {
n := rand.Intn(500)
var ret []int
// if n > 0 {
// t, _, err := r.Image().Query(nil, getRandomFilter(n))
// if err != nil {
// panic(err)
// }
// for _, tt := range t {
// ret = utils.IntAppendUnique(ret, tt.ID)
// }
// }
for i := 0; i < n; i++ {
ret = utils.IntAppendUnique(ret, rand.Intn(c.Images)+1)
}
return ret
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
package main
import (
"bufio"
"math/rand"
"os"
"strings"
)
var names map[string]*naming
type performerNamingConfig struct {
Male string `yaml:"male"`
Female string `yaml:"female"`
Surname string `yaml:"surname"`
}
type namingConfig struct {
Scenes string `yaml:"scenes"`
Performers performerNamingConfig `yaml:"performers"`
Galleries string `yaml:"galleries"`
Studios string `yaml:"studios"`
Images string `yaml:"images"`
Tags string `yaml:"tags"`
}
type naming struct {
names []string
}
func (n naming) generateName(words int) string {
var ret []string
for i := 0; i < words; i++ {
w := rand.Intn(len(n.names))
ret = append(ret, n.names[w])
}
return strings.Join(ret, " ")
}
func createNaming(fn string) (*naming, error) {
file, err := os.Open(fn)
if err != nil {
return nil, err
}
defer file.Close()
ret := &naming{}
s := bufio.NewScanner(file)
for s.Scan() {
ret.names = append(ret.names, s.Text())
}
if err := s.Err(); err != nil {
return nil, err
}
return ret, nil
}
func initNaming(c config) {
names = make(map[string]*naming)
load := func(v string) {
if names[v] == nil {
var err error
names[v], err = createNaming(v)
if err != nil {
panic(err)
}
}
}
n := c.Naming
load(n.Galleries)
load(n.Images)
load(n.Scenes)
load(n.Studios)
load(n.Tags)
load(n.Performers.Female)
load(n.Performers.Male)
load(n.Performers.Surname)
}
func generatePerformerName() string {
female := rand.Intn(4) > 0
wordRand := rand.Intn(100)
givenNames := 1
surnames := 1
if wordRand < 3 {
givenNames = 2
} else if wordRand < 26 {
surnames = 0
}
fn := c.Naming.Performers.Female
if !female {
fn = c.Naming.Performers.Male
}
name := names[fn].generateName(givenNames)
if surnames > 0 {
name += " " + names[c.Naming.Performers.Surname].generateName(1)
}
return name
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff