From d1caf858838a9d6bb2f110c59050e94e458db361 Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Sat, 1 May 2021 16:35:49 -0500
Subject: [PATCH] Extract tags when present

Supported currently on Xenforo and AO3
---
 sites/__init__.py | 1 +
 sites/ao3.py      | 3 ++-
 sites/xenforo.py  | 4 +++-
 sites/xenforo2.py | 4 +++-
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/sites/__init__.py b/sites/__init__.py
index ef468bc..67c577b 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -36,6 +36,7 @@ class Section:
     id = attr.ib(default=attr.Factory(_default_uuid_string, takes_self=True), converter=str)
     contents = attr.ib(default=attr.Factory(list))
     footnotes = attr.ib(default=attr.Factory(list))
+    tags = attr.ib(default=attr.Factory(list))
     summary = attr.ib(default='')
 
     def __iter__(self):
diff --git a/sites/ao3.py b/sites/ao3.py
index 376b4d8..dd182c7 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -59,7 +59,8 @@ class ArchiveOfOurOwn(Site):
             title=soup.select('#workskin > .preface .title')[0].text.strip(),
             author=soup.select('#workskin .preface .byline a')[0].text.strip(),
             summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(),
-            url=f'http://archiveofourown.org/works/{workid}'
+            url=f'http://archiveofourown.org/works/{workid}',
+            tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')]
         )
 
         # Fetch the chapter list as well because it contains info that's not in the full work
diff --git a/sites/xenforo.py b/sites/xenforo.py
index 9facffc..4c8b50b 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -133,10 +133,12 @@ class XenForo(Site):
         # clean out informational bits from the title
         for tag in title.find_all(class_='prefix'):
             tag.decompose()
+        tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')]
         return Section(
             title=title.get_text().strip(),
             author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
-            url=url
+            url=url,
+            tags=tags
         )
 
     def _posts_from_page(self, soup, postid=False):
diff --git a/sites/xenforo2.py b/sites/xenforo2.py
index ec08f12..bc9398c 100644
--- a/sites/xenforo2.py
+++ b/sites/xenforo2.py
@@ -16,10 +16,12 @@ class XenForo2(XenForo):
         # clean out informational bits from the title
         for tag in title.select('.labelLink,.label-append'):
             tag.decompose()
+        tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')]
         return Section(
             title=title.get_text().strip(),
             author=soup.find('div', class_='p-description').find('a', class_='username').get_text(),
-            url=url
+            url=url,
+            tags=tags
         )
 
     def _posts_from_page(self, soup, postid=False):