mirror of
https://github.com/kemayo/leech
synced 2025-12-13 11:53:43 +01:00
Extract tags when present
Supported currently on Xenforo and AO3
This commit is contained in:
parent
37cb0332b7
commit
d1caf85883
4 changed files with 9 additions and 3 deletions
|
|
@ -36,6 +36,7 @@ class Section:
|
|||
id = attr.ib(default=attr.Factory(_default_uuid_string, takes_self=True), converter=str)
|
||||
contents = attr.ib(default=attr.Factory(list))
|
||||
footnotes = attr.ib(default=attr.Factory(list))
|
||||
tags = attr.ib(default=attr.Factory(list))
|
||||
summary = attr.ib(default='')
|
||||
|
||||
def __iter__(self):
|
||||
|
|
|
|||
|
|
@ -59,7 +59,8 @@ class ArchiveOfOurOwn(Site):
|
|||
title=soup.select('#workskin > .preface .title')[0].text.strip(),
|
||||
author=soup.select('#workskin .preface .byline a')[0].text.strip(),
|
||||
summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(),
|
||||
url=f'http://archiveofourown.org/works/{workid}'
|
||||
url=f'http://archiveofourown.org/works/{workid}',
|
||||
tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')]
|
||||
)
|
||||
|
||||
# Fetch the chapter list as well because it contains info that's not in the full work
|
||||
|
|
|
|||
|
|
@ -133,10 +133,12 @@ class XenForo(Site):
|
|||
# clean out informational bits from the title
|
||||
for tag in title.find_all(class_='prefix'):
|
||||
tag.decompose()
|
||||
tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')]
|
||||
return Section(
|
||||
title=title.get_text().strip(),
|
||||
author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
|
||||
url=url
|
||||
url=url,
|
||||
tags=tags
|
||||
)
|
||||
|
||||
def _posts_from_page(self, soup, postid=False):
|
||||
|
|
|
|||
|
|
@ -16,10 +16,12 @@ class XenForo2(XenForo):
|
|||
# clean out informational bits from the title
|
||||
for tag in title.select('.labelLink,.label-append'):
|
||||
tag.decompose()
|
||||
tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')]
|
||||
return Section(
|
||||
title=title.get_text().strip(),
|
||||
author=soup.find('div', class_='p-description').find('a', class_='username').get_text(),
|
||||
url=url
|
||||
url=url,
|
||||
tags=tags
|
||||
)
|
||||
|
||||
def _posts_from_page(self, soup, postid=False):
|
||||
|
|
|
|||
Loading…
Reference in a new issue