freeones scraper fixes/tweaking (#584)

This commit is contained in:
bnkai 2020-06-02 02:45:37 +03:00 committed by GitHub
parent d1e6858c11
commit b89956de25
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -24,10 +24,9 @@ performerByURL:
xPathScrapers: xPathScrapers:
performerSearch: performerSearch:
performer: performer:
Name: //div[@id="search-result"]//a[@class=""]//div//p/text() Name: //div[@id="search-result"]//p[@data-test="subject-name"]/text()
URL: URL:
selector: //div[@id="search-result"]//a[@class=""]/@href selector: //div[@id="search-result"]//div[@data-test="teaser-subject"]/a/@href
# URL is a partial url, add the first part
replace: replace:
- regex: ^ - regex: ^
with: https://www.freeones.xxx with: https://www.freeones.xxx
@ -39,21 +38,18 @@ xPathScrapers:
Name: //h1 Name: //h1
URL: URL:
selector: //a[span[text()="Profile"]]/@href selector: //a[span[text()="Profile"]]/@href
# URL is a partial url, add the first part
replace: replace:
- regex: ^ - regex: ^
with: https://www.freeones.xxx with: https://www.freeones.xxx
Twitter: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-twitter']/@href Twitter: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-twitter']/@href
Instagram: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-telegram']/@href Instagram: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center mr-2 social-icons color-telegram']/@href
# need to add support for concatenating two elements or something
Birthdate: Birthdate:
selector: //div[p[text()='Personal Information']]//div//p[1]//a selector: //div[p[text()='Personal Information']]//div//p/a/span[contains(text(),'Born On')]
replace: replace:
- regex: Born On - regex: Born On
with: with:
- regex: "," - regex: ","
with: with:
# reference date is: 2006/01/02
parseDate: January 2 2006 parseDate: January 2 2006
Ethnicity: Ethnicity:
selector: //div[p[text()='Ethnicity']]//div//p[@class='mb-0 text-center'] selector: //div[p[text()='Ethnicity']]//div//p[@class='mb-0 text-center']
@ -66,16 +62,27 @@ xPathScrapers:
with: "black" with: "black"
- regex: Latin - regex: Latin
with: "hispanic" with: "hispanic"
Country: //div[p[text()='Personal Information']]//div//p[3]//a[last()] Country: //div[p[text()='Personal Information']]//div//p//a[@data-test="link-country"]
EyeColor: //div[p[text()='Eye Color']]//div//p//a//span EyeColor: //div[p[text()='Eye Color']]//div//p//a//span
Height: Height:
selector: //div[p[text()='Height']]//div//p//a//span selector: //div[p[text()='Height']]//div//p//a//span
replace: replace:
- regex: \D+[\s\S]+ - regex: \D+[\s\S]+
with: "" with: ""
Measurements: //div[p[text()='Measurements']]//div[@class='p-3']//p Measurements:
FakeTits: //div[p[text()='Fake Boobs']]//div[@class='p-3']//p selector: //div[p[text()='Measurements']]//div[@class='p-3']//p
# nbsp; screws up the parsing, so use contains instead replace:
- regex: Unknown
with:
FakeTits:
selector: //span[@data-test='link_span_boobs']
replace:
- regex: Unknown
with:
- regex: Fake
with: "Yes"
- regex: Natural
with: "No"
CareerLength: CareerLength:
selector: //div[p[text()='career']]//div//div[@class='timeline-horizontal mb-3']//div//p[@class='m-0'] selector: //div[p[text()='career']]//div//div[@class='timeline-horizontal mb-3']//div//p[@class='m-0']
concat: "-" concat: "-"
@ -87,7 +94,6 @@ xPathScrapers:
Piercings: //div[p[text()='Piercings']]//div//p[@class='mb-0 text-center'] Piercings: //div[p[text()='Piercings']]//div//p[@class='mb-0 text-center']
Image: Image:
selector: //div[@class='profile-image-large']//a/img/@src selector: //div[@class='profile-image-large']//a/img/@src
# URL is a partial url, add the first part
` `
func GetFreeonesScraper() scraperConfig { func GetFreeonesScraper() scraperConfig {