diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index fb2fe98b..33070893 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -3525,9 +3525,6 @@ add_to_titlepage_entries:,views, averageWords, fandoms
## parameter to your personal.ini and list the ones you don't want.
#exclude_notes:authornotes,newsboxes,spoilers,footnotes
-[www.silmarillionwritersguild.org]
-use_basic_cache:true
-
[www.siye.co.uk]
use_basic_cache:true
## Site dedicated to these categories/characters/ships
diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py
index a126124c..14d91521 100644
--- a/fanficfare/adapters/__init__.py
+++ b/fanficfare/adapters/__init__.py
@@ -130,7 +130,6 @@ from . import adapter_swiorgru
from . import adapter_fanficsme
from . import adapter_fanfictalkcom
from . import adapter_scifistoriescom
-from . import adapter_silmarillionwritersguildorg
from . import adapter_chireadscom
from . import adapter_scribblehubcom
from . import adapter_fictionlive
diff --git a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py b/fanficfare/adapters/adapter_silmarillionwritersguildorg.py
deleted file mode 100644
index b00e5bbe..00000000
--- a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 FanFicFare team
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import absolute_import
-import logging
-logger = logging.getLogger(__name__)
-import re
-from bs4.element import Tag
-from .. import exceptions as exceptions
-
-# py2 vs py3 transition
-from ..six import text_type as unicode
-
-from .base_adapter import BaseSiteAdapter, makeDate
-
-def getClass():
- return SilmarillionWritersGuildOrgAdapter
-
-# Class name has to be unique. Our convention is camel case the
-# sitename with Adapter at the end. www is skipped.
-class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
-
- def __init__(self, config, url):
- BaseSiteAdapter.__init__(self, config, url)
-
- self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
- self.password = ""
- self.is_adult=False
-
- # get storyId from url--url validation guarantees query is only sid=1234
- self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
-
- # normalized story URL.
- self._setURL('http://' + self.getSiteDomain() + '/archive/home/viewstory.php?sid='+self.story.getMetadata('storyId'))
-
- # Each adapter needs to have a unique site abbreviation.
- self.story.setMetadata('siteabbrev','swg')
-
- # The date format will vary from site to site.
- # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
- self.dateformat = "%B %d, %Y"
-
- @staticmethod # must be @staticmethod, don't remove it.
- def getSiteDomain():
- # The site domain. Does have www here, if it uses it.
- return 'www.silmarillionwritersguild.org'
-
- @classmethod
- def getSiteExampleURLs(cls):
- return "https://"+cls.getSiteDomain()+"/archive/home/viewstory.php?sid=123"
-
- def getSiteURLPattern(self):
- return r"https?://"+re.escape(self.getSiteDomain()+"/archive/home/viewstory.php?sid=")+r"\d+$"
-
- ## Getting the chapter list and the meta data
- def extractChapterUrlsAndMetadata(self):
-
- url = self.url
- logger.debug("URL: "+url)
-
- data = self.get_request(url)
-
- soup = self.make_soup(data)
-
-
- ## Title and author
-
- # find story header
- a = soup.find('h6')
-
- titleLinks = a.find_all('a')
- authorLink= titleLinks[1]
-
- self.story.setMetadata('authorId',authorLink['href'].split('=')[1])
- self.story.setMetadata('authorUrl','https://'+self.host+'/archive/home/'+authorLink['href'])
- self.story.setMetadata('author',authorLink.string)
-
- self.story.setMetadata('title',a.find('strong').find('a').get_text())
-
- # Site does some weird stuff with pagination on series view and will only display 25 stories per page of results
- # Therefor to get accurate index for series, we fetch all sub-pages of series and parse for valid story urls and add to a list,
- # Then find first instance of current story url and use the number of loop itteration for index
-
- # This is pretty slow but ehh it works
-
- try:
- # Find Series name from series URL.
- a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
- if a:
- seriesName = a.string
- seriesUrl = 'https://'+self.host+'/archive/home/'+a['href']
-
- self.story.setMetadata('seriesUrl',seriesUrl)
-
- #logger.debug("Series Url: "+seriesUrl)
-
- # Get Series page and convert to soup
- seriesPageSoup = self.make_soup(self.get_request(seriesUrl+"&offset=0"))
- ## &offset=0 is the same as the first page, by adding
- ## that, the page cache will save us from fetching it
- ## twice in the loop below.
-
- # Find Series page sub-pages
- seriesPageUrlList = []
- seriesStoryList = []
- for i in seriesPageSoup.findAll('a', href=re.compile(r"viewseries.php\?seriesid=\d+&offset=\d+$")):
- # Don't include url from next button, is another http request and parse + could cause more bugs!
- if i.string != '[Next]':
- seriesPageUrlList.append(i)
-
- #get urls from all subpages and append to list
- i=1
- for seriesPagePageUrl in seriesPageUrlList:
- seriesPagePageSoup = self.make_soup(self.get_request('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
- storyHeaders = seriesPagePageSoup.findAll('h5')
- ## can't just search for story URLs, some story
- ## descs also contain story URLs. Looks like only
- ## story titles are
.
- for storyHeader in storyHeaders:
- seriesPagePageStoryUrl = storyHeader.find('a',href=re.compile(r'^viewstory.php\?sid=\d+$'))
- if seriesPagePageStoryUrl['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
- #logger.debug("Series Name: "+ seriesName)
- #logger.debug("Series Index: "+i)
- self.setSeries(seriesName, i)
- raise StopIteration("Break out of series parsing loops")
- i+=1
-
- except StopIteration:
- # break out of both loops, don't need to fetch further
- # pages after story found.
- pass
- except Exception as e:
- logger.warning("series parsing failed(%s)"%e)
-
- # Find the chapters by regexing urls
- chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$"))
-
- if len(chapters)==1:
- self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+'/archive/home/'+chapters[0]['href'])
- else:
- for chapter in chapters:
- # logger.debug("Added Chapter: "+chapter.string)
- self.add_chapter(chapter,'https://'+self.host+'/archive/home/'+chapter['href'])
-
- # find the details section for the work, will hopefully make parsing metadata a bit easier
-
- workDetails = soup.find('div', {'id' : 'general'}).find('div', {'id' : 'general'})
-
- # some metadata can be retrieved through regexes so will do that to try and avoid a janky mess.
-
- #get characters
- try:
- charList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=characters'+r"&charid=\d+$"))
- for char in charList:
- self.story.addToList('characters',char.string)
-
- except Exception as e:
- logger.warning("character parsing failed(%s)"%e)
-
- #get warnings
- try:
- warnList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=2'+r"&classid=\d+$"))
- for warn in warnList:
- self.story.addToList('warnings', warn.string)
-
- except Exception as e:
- logger.warning("warning parsing failed(%s)"%e)
-
- #get genres
- try:
- genresList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=1'+r"&classid=\d+$"))
- for genre in genresList:
- self.story.addToList('genre', genre.string)
-
- except Exception as e:
- logger.warning("genre parsing failed(%s)"%e)
-
- # no convenient way to extract remaining metadata so bodge it by finding relevant identifier string and using next element as the data source
-
- #get summary by finding identifier, then itterating until next identifier is found and using data between the two as the summary
- try:
- summaryStart = workDetails.find('strong',text='Summary: ')
- currentElement = summaryStart.parent.next_sibling
- summaryValue = ""
- while not isinstance(currentElement,Tag) or currentElement.name != 'strong':
- summaryValue += unicode(currentElement)
- currentElement = currentElement.next_sibling
- #logger.debug(summaryValue)
- self.setDescription(url,summaryValue)
- except Exception as e:
- logger.warning("summary parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
-
- #get rating
- try:
- rating = workDetails.find('strong',text='Rated:').next_sibling.string
- self.story.setMetadata('rating', rating)
- except Exception as e:
- logger.warning("rating parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
- #get completion status and correct for consistency with other adapters
- try:
- if (workDetails.find('strong',text='Completed:').next_sibling.string).lower() == "yes":
- status="Completed"
-
- else:
- status="In-Progress"
-
- self.story.setMetadata('status', status)
- except Exception as e:
- logger.warning("status parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
- #get wordcount
- try:
- wordCount = workDetails.find('strong',text='Word count:').next_sibling.string
- self.story.setMetadata('numWords', wordCount)
- except Exception as e:
- logger.warning("wordcount parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
- #get published date, this works for some reason yet doesn't without the spaces in it
- try:
- datePublished = workDetails.find('strong',text=' Published: ').next_sibling.string
- self.story.setMetadata('datePublished', makeDate(datePublished, self.dateformat))
-
- except Exception as e:
- logger.warning("datePublished parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
- #get updated date
- try:
- dateUpdated = workDetails.find('strong',text='Updated:').next_sibling.string
- self.story.setMetadata('dateUpdated', makeDate(dateUpdated, self.dateformat))
-
- except Exception as e:
- logger.warning("dateUpdated parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-
- # grab the text for an individual chapter.
- def getChapterText(self, url):
-
- logger.debug('Getting chapter text from: %s' % url)
-
- data = self.get_request(url)
- soup = self.make_soup(data)
-
- # No convenient way to get story without the rest of the page, so get whole page and strip unneeded sections
-
- contentParent = soup.find('div', {'id' : 'maincontent'}).find('div', {'id' : 'general'})
-
- contentParent.find('p').decompose() # remove page header
- contentParent.find_all('div',id='general')[2].decompose() #remove page footer
- contentParent.find_all('div',id='general')[0].decompose() #remove chapter select etc.
-
- contentParent.name='div'
-
- #error on failure
- if None == contentParent:
- raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
-
- return self.utf8FromSoup(url,contentParent)
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index 14728c5a..ff1dc2f2 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -3495,9 +3495,6 @@ add_to_titlepage_entries:,views, averageWords, fandoms
## parameter to your personal.ini and list the ones you don't want.
#exclude_notes:authornotes,newsboxes,spoilers,footnotes
-[www.silmarillionwritersguild.org]
-use_basic_cache:true
-
[www.siye.co.uk]
use_basic_cache:true
## Site dedicated to these categories/characters/ships