1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-13 11:53:43 +01:00

Extract tags when present

Supported currently on Xenforo and AO3
This commit is contained in:
David Lynch 2021-05-01 16:35:49 -05:00
parent 37cb0332b7
commit d1caf85883
4 changed files with 9 additions and 3 deletions

View file

@ -36,6 +36,7 @@ class Section:
id = attr.ib(default=attr.Factory(_default_uuid_string, takes_self=True), converter=str)
contents = attr.ib(default=attr.Factory(list))
footnotes = attr.ib(default=attr.Factory(list))
tags = attr.ib(default=attr.Factory(list))
summary = attr.ib(default='')
def __iter__(self):

View file

@ -59,7 +59,8 @@ class ArchiveOfOurOwn(Site):
title=soup.select('#workskin > .preface .title')[0].text.strip(),
author=soup.select('#workskin .preface .byline a')[0].text.strip(),
summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(),
url=f'http://archiveofourown.org/works/{workid}'
url=f'http://archiveofourown.org/works/{workid}',
tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')]
)
# Fetch the chapter list as well because it contains info that's not in the full work

View file

@ -133,10 +133,12 @@ class XenForo(Site):
# clean out informational bits from the title
for tag in title.find_all(class_='prefix'):
tag.decompose()
tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')]
return Section(
title=title.get_text().strip(),
author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
url=url
url=url,
tags=tags
)
def _posts_from_page(self, soup, postid=False):

View file

@ -16,10 +16,12 @@ class XenForo2(XenForo):
# clean out informational bits from the title
for tag in title.select('.labelLink,.label-append'):
tag.decompose()
tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')]
return Section(
title=title.get_text().strip(),
author=soup.find('div', class_='p-description').find('a', class_='username').get_text(),
url=url
url=url,
tags=tags
)
def _posts_from_page(self, soup, postid=False):