From f9471377bbfaacac2201430d1441b16b7d5ea30d Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 29 Mar 2023 17:56:12 -0500 Subject: [PATCH] Remove site: sugarquill.net - Site retired --- calibre-plugin/plugin-defaults.ini | 3 - fanficfare/adapters/__init__.py | 1 - fanficfare/adapters/adapter_sugarquillnet.py | 190 ------------------- fanficfare/defaults.ini | 3 - 4 files changed, 197 deletions(-) delete mode 100644 fanficfare/adapters/adapter_sugarquillnet.py diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index e40d9972..d64c58ff 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -3687,9 +3687,6 @@ extracategories:Lord of the Rings website_encodings:Windows-1252,utf8 -[www.sugarquill.net] -use_basic_cache:true - [www.swi.org.ru] use_basic_cache:true diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index d47bbd30..d6e93a4a 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -113,7 +113,6 @@ from . import adapter_valentchambercom from . import adapter_looselugscom from . import adapter_wwwgiantessworldnet from . import adapter_lotrgficcom -from . import adapter_sugarquillnet from . import adapter_starslibrarynet from . import adapter_fanficauthorsnet from . import adapter_fireflyfansnet diff --git a/fanficfare/adapters/adapter_sugarquillnet.py b/fanficfare/adapters/adapter_sugarquillnet.py deleted file mode 100644 index daf9f1a3..00000000 --- a/fanficfare/adapters/adapter_sugarquillnet.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -############################################################################# -### Adapted by GComyn -### Original - November 23, 2016 -### Updateed - November 24,2016 -### Fixed chapter determination. Some stories had another form in the -### first chapter, so had to change that section. -############################################################################# -### Updateed - November 25,2016 -### some of the stories have extra formatting that makes the heuristics -### take a long gime to process. I've removed as much of the extra -### formatting as I thought I could. -############################################################################# -from __future__ import absolute_import -import logging - -import re - -from bs4.element import Comment -from ..htmlcleanup import stripHTML -from .. import exceptions as exceptions - -from .base_adapter import BaseSiteAdapter, makeDate - -logger = logging.getLogger(__name__) - - -def getClass(): - return SugarQuillNetAdapter - -# Class name has to be unique. Our convention is camel case the -# sitename with Adapter at the end. www is skipped. -class SugarQuillNetAdapter(BaseSiteAdapter): - - def __init__(self, config, url): - BaseSiteAdapter.__init__(self, config, url) - - self.username = "NoneGiven" # if left empty, site doesn't return any message at all. - self.password = "" - self.is_adult=False - - self.story.setMetadata("storyId", self.parsed_QS["storyid"]) - - # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/read.php?storyid='+self.story.getMetadata('storyId')) - - # Each adapter needs to have a unique site abbreviation. - self.story.setMetadata('siteabbrev','sq') - - # The date format will vary from site to site. - # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%m/%d/%y" - - @staticmethod # must be @staticmethod, don't remove it. - def getSiteDomain(): - # The site domain. Does have www here, if it uses it. - return 'www.sugarquill.net' - - @classmethod - def stripURLParameters(cls, url): - return url - - @classmethod - def getSiteExampleURLs(cls): - return "http://" + cls.getSiteDomain() + "/read.php?storyid=1234" - - def getSiteURLPattern(self): - return "http://" + re.escape(self.getSiteDomain()) + \ - r"/read\.php?.*storyid=\d+" - - @classmethod - def stripURLParameters(cls,url): - ## remove any parameters other than storyid. chapno=1 will be - ## added to canonical storyUrl - return re.sub(r"\?.*(storyid=\d+).*$",r"?\1",url) - - ## Getting the chapter list and the meta data - def extractChapterUrlsAndMetadata(self): - - # index=1 makes sure we see the story chapter index. Some - # sites skip that for one-chapter stories. - url = self.url+'&chapno=1' - logger.debug("URL: "+url) - - data = self.get_request(url) - - if "Invalid storyid or chapno" in data: - raise exceptions.AccessDenied(self.getSiteDomain() +" says: Invalid storyid or chapno.") - - soup = self.make_soup(data) - - - ## Title - a = soup.find('b',text='Story').nextSibling.string.strip(':').strip() - self.story.setMetadata('title',a) - - # Find authorid and URL from... author url. - a = soup.find('b',text='Author').nextSibling.nextSibling - self.story.setMetadata('authorId',a['href'].split('id=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) - self.story.setMetadata('author',a.string.replace("(Professors' Bookshelf)",'').strip()) - - # Find the chapters: - chapters = soup.find('select',{'name':'chapno'}).findAll('option') - for chapter in chapters: - if chapter.string == 'Default': - chapter.string = 'Chapter 1' - self.add_chapter(chapter.string, '{0}&chapno={1}'.format(self.url,chapter['value'])) - - - ## This site doesn't have much metadata, so we will get what we can. - ## The metadata is all on the author's page, so we have to get it to parse. - author_Url = self.story.getMetadata('authorUrl').replace('&','&') - logger.debug('Getting the author page: {0}'.format(author_Url)) - adata = self.get_request(author_Url) - - if 'Invalid authorid' in adata: - raise exceptions.StoryDoesNotExist('{0} says: Invalid authorid'.format(self.getSiteDomain())) - - asoup = self.make_soup(adata) - - lc2 = asoup.find('a', href=re.compile(r'read.php\?storyid='+self.story.getMetadata('storyId'))) - lc2 = lc2.findPrevious('table') - summry = stripHTML(lc2.find('td',{'class':'highlightcolor2'})).strip() - self.setDescription(url,summry) - - lupdt = lc2.findAll('td',{'class':'highlightcolor1'})[1].string.replace('Last updated','').strip() - self.story.setMetadata('dateUpdated', makeDate(lupdt, self.dateformat)) - - self._setURL('http://' + self.getSiteDomain() + '/read.php?storyid='+self.story.getMetadata('storyId')+'&chapno=1') - ## and that is all of the metadata that is on this site... - - # grab the text for an individual chapter. - def getChapterText(self, url): - - logger.debug('Getting chapter text from: %s' % url) - - soup = self.make_soup(self.get_request(url)) - - chap = soup.find('td',{'class':'content_pane'}) - - if chap is None: - raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - - ## some chapters have a table at the beginning, which we shall remove. - for tag in chap.findAll('table'): - #tag.extract() - tag.decompose() - - ## some stories have extra formatting... going to try to remove as much as possible. - for tag in chap.findAll('style') + chap.findAll("o:p"): - #tag.extract() - tag.decompose() - - #strip comments from soup - [comment.extract() for comment in chap.findAll(text=lambda text:isinstance(text, Comment))] - - ## these tags seem to cause the h - for tag in chap.findAll('o:smarttagtype'): - tag.name = 'span' - tag.attrs = None #delte all attributes - - for tag in chap.findAll('p') + chap.findAll('b') + chap.findAll('i') + chap.findAll('em') + chap.findAll('strong') + chap.findAll('span'): - tag.attrs = None #delte all attributes - if tag.string == '=': - tag.replace_with("'") - - for tag in chap.findAll('span'): - tag.attrs = None #delte all attributes - if tag.findAll(True) is None: - if tag.string == '=': - tag.replace_with("'") - - chap.name = 'div' # don't want a bare without a table. - return self.utf8FromSoup(url,chap) diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 89461af1..719fd63d 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -3673,9 +3673,6 @@ extracategories:Lord of the Rings website_encodings:Windows-1252,utf8 -[www.sugarquill.net] -use_basic_cache:true - [www.swi.org.ru] use_basic_cache:true