mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 16:34:02 +01:00
freeones scraper fixes/tweaking (#584)
This commit is contained in:
parent
d1e6858c11
commit
b89956de25
1 changed files with 28 additions and 22 deletions
|
|
@ -24,10 +24,9 @@ performerByURL:
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
performerSearch:
|
performerSearch:
|
||||||
performer:
|
performer:
|
||||||
Name: //div[@id="search-result"]//a[@class=""]//div//p/text()
|
Name: //div[@id="search-result"]//p[@data-test="subject-name"]/text()
|
||||||
URL:
|
URL:
|
||||||
selector: //div[@id="search-result"]//a[@class=""]/@href
|
selector: //div[@id="search-result"]//div[@data-test="teaser-subject"]/a/@href
|
||||||
# URL is a partial url, add the first part
|
|
||||||
replace:
|
replace:
|
||||||
- regex: ^
|
- regex: ^
|
||||||
with: https://www.freeones.xxx
|
with: https://www.freeones.xxx
|
||||||
|
|
@ -39,21 +38,18 @@ xPathScrapers:
|
||||||
Name: //h1
|
Name: //h1
|
||||||
URL:
|
URL:
|
||||||
selector: //a[span[text()="Profile"]]/@href
|
selector: //a[span[text()="Profile"]]/@href
|
||||||
# URL is a partial url, add the first part
|
|
||||||
replace:
|
replace:
|
||||||
- regex: ^
|
- regex: ^
|
||||||
with: https://www.freeones.xxx
|
with: https://www.freeones.xxx
|
||||||
Twitter: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-twitter']/@href
|
Twitter: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-twitter']/@href
|
||||||
Instagram: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-telegram']/@href
|
Instagram: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-telegram']/@href
|
||||||
# need to add support for concatenating two elements or something
|
|
||||||
Birthdate:
|
Birthdate:
|
||||||
selector: //div[p[text()='Personal Information']]//div//p[1]//a
|
selector: //div[p[text()='Personal Information']]//div//p/a/span[contains(text(),'Born On')]
|
||||||
replace:
|
replace:
|
||||||
- regex: Born On
|
- regex: Born On
|
||||||
with:
|
with:
|
||||||
- regex: ","
|
- regex: ","
|
||||||
with:
|
with:
|
||||||
# reference date is: 2006/01/02
|
|
||||||
parseDate: January 2 2006
|
parseDate: January 2 2006
|
||||||
Ethnicity:
|
Ethnicity:
|
||||||
selector: //div[p[text()='Ethnicity']]//div//p[@class='mb-0 text-center']
|
selector: //div[p[text()='Ethnicity']]//div//p[@class='mb-0 text-center']
|
||||||
|
|
@ -66,16 +62,27 @@ xPathScrapers:
|
||||||
with: "black"
|
with: "black"
|
||||||
- regex: Latin
|
- regex: Latin
|
||||||
with: "hispanic"
|
with: "hispanic"
|
||||||
Country: //div[p[text()='Personal Information']]//div//p[3]//a[last()]
|
Country: //div[p[text()='Personal Information']]//div//p//a[@data-test="link-country"]
|
||||||
EyeColor: //div[p[text()='Eye Color']]//div//p//a//span
|
EyeColor: //div[p[text()='Eye Color']]//div//p//a//span
|
||||||
Height:
|
Height:
|
||||||
selector: //div[p[text()='Height']]//div//p//a//span
|
selector: //div[p[text()='Height']]//div//p//a//span
|
||||||
replace:
|
replace:
|
||||||
- regex: \D+[\s\S]+
|
- regex: \D+[\s\S]+
|
||||||
with: ""
|
with: ""
|
||||||
Measurements: //div[p[text()='Measurements']]//div[@class='p-3']//p
|
Measurements:
|
||||||
FakeTits: //div[p[text()='Fake Boobs']]//div[@class='p-3']//p
|
selector: //div[p[text()='Measurements']]//div[@class='p-3']//p
|
||||||
# nbsp; screws up the parsing, so use contains instead
|
replace:
|
||||||
|
- regex: Unknown
|
||||||
|
with:
|
||||||
|
FakeTits:
|
||||||
|
selector: //span[@data-test='link_span_boobs']
|
||||||
|
replace:
|
||||||
|
- regex: Unknown
|
||||||
|
with:
|
||||||
|
- regex: Fake
|
||||||
|
with: "Yes"
|
||||||
|
- regex: Natural
|
||||||
|
with: "No"
|
||||||
CareerLength:
|
CareerLength:
|
||||||
selector: //div[p[text()='career']]//div//div[@class='timeline-horizontal mb-3']//div//p[@class='m-0']
|
selector: //div[p[text()='career']]//div//div[@class='timeline-horizontal mb-3']//div//p[@class='m-0']
|
||||||
concat: "-"
|
concat: "-"
|
||||||
|
|
@ -87,7 +94,6 @@ xPathScrapers:
|
||||||
Piercings: //div[p[text()='Piercings']]//div//p[@class='mb-0 text-center']
|
Piercings: //div[p[text()='Piercings']]//div//p[@class='mb-0 text-center']
|
||||||
Image:
|
Image:
|
||||||
selector: //div[@class='profile-image-large']//a/img/@src
|
selector: //div[@class='profile-image-large']//a/img/@src
|
||||||
# URL is a partial url, add the first part
|
|
||||||
`
|
`
|
||||||
|
|
||||||
func GetFreeonesScraper() scraperConfig {
|
func GetFreeonesScraper() scraperConfig {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue