feat: add chireads.com adapter and tests on this one

This commit is contained in:
Kolbo 2020-07-05 11:15:33 +02:00 committed by Jim Miller
parent c92effa01b
commit 1669c06703
4 changed files with 933 additions and 0 deletions

View file

@ -169,6 +169,7 @@ from . import adapter_fanficsme
from . import adapter_archivehpfanfictalkcom
from . import adapter_scifistoriescom
from . import adapter_silmarillionwritersguildorg
from . import adapter_chireadscom
## This bit of complexity allows adapters to be added by just adding
## importing. It eliminates the long if/else clauses we used to need

View file

@ -0,0 +1,120 @@
# -*- coding: utf-8 -*-
# Copyright 2019 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
import re
# py2 vs py3 transition
from ..six import text_type as unicode, ensure_text
from ..six.moves.urllib import parse as urlparse
from ..six.moves.urllib.error import HTTPError
from .base_adapter import BaseSiteAdapter, makeDate
from fanficfare.htmlcleanup import stripHTML
from .. import exceptions as exceptions
logger = logging.getLogger(__name__)
def getClass():
return ChireadsComSiteAdapter
class ChireadsComSiteAdapter(BaseSiteAdapter):
NEW_DATE_FORMAT = '%Y/%m/%d %H:%M:%S'
OLD_DATE_FORMAT = '%m/%d/%Y %I:%M:%S %p'
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'chireads')
# get storyId from url--url validation guarantees query correct
match = re.match(self.getSiteURLPattern(), url)
if not match:
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
story_id = match.group('id')
self.story.setMetadata('storyId', story_id)
self._setURL('https://%s/category/translatedtales/%s/' % (self.getSiteDomain(), story_id))
@staticmethod
def getSiteDomain():
return 'chireads.com'
@classmethod
def getAcceptDomains(cls):
return ['chireads.com']
@classmethod
def getSiteExampleURLs(cls):
return 'https://%s/category/translatedtales/story-name' % cls.getSiteDomain()
def getSiteURLPattern(self):
return r'https?://chireads\.com/category/translatedtales/(?P<id>[^/]+)(/)?'
def use_pagecache(self):
return True
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
try:
data = self._fetchUrl(self.url)
except HTTPError as exception:
if exception.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
raise exception
soup = self.make_soup(data)
info = soup.select_one('.inform-inform-data')
self.story.setMetadata('title', stripHTML(info.h3).split(' | ')[0])
self.setCoverImage(self.url, soup.select_one('.inform-product > img')['src'])
# Unicode strings because '' isn't ':', but \xef\xbc\x9a
# author = stripHTML(info.h6).split(u' ')[0].replace(u'Auteur : ', '', 1)
author = stripHTML(info.h6).split('Babelcheck')[0].replace('Auteur : ', '').replace('\xc2\xa0', '')
# author = stripHTML(info.h6).split('\xa0')[0].replace(u'Auteur : ', '', 1)
self.story.setMetadata('author', author)
self.story.setMetadata('authorId', author)
datestr = stripHTML(soup.select_one('.newestchapitre > div > a')['href'])[-11:-1]
date = makeDate(datestr, '%Y/%m/%d')
if date:
self.story.setMetadata('dateUpdated', date)
intro = stripHTML(info.select_one('.inform-inform-txt').span)
self.setDescription(self.url, intro)
for content in soup.findAll('div', {'id': 'content'}):
for a in content.findAll('a'):
self.add_chapter(a.get_text(), a['href'])
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = self.make_soup(data)
content = soup.select_one('#content')
if None == content:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,content)

View file

@ -0,0 +1,143 @@
import pytest
# from unittest import mock
from unittest.mock import patch # , call, Mock
from fanficfare.six.moves.urllib.error import HTTPError
from fanficfare.adapters.adapter_chireadscom import ChireadsComSiteAdapter as chiread
from fanficfare.configurable import Configuration
from fanficfare import exceptions
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter.setDescription')
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter.setCoverImage')
class TestExtractChapterUrlsAndMetadata:
def setup_method(self):
self.url = 'https://chireads.com/category/translatedtales/some-story/'
self.configuration = Configuration(["chireads.com"], "EPUB", lightweight=True)
self.chireads = chiread(self.configuration, self.url)
def test_raise_404_for_unexistant_story(self, mock_setCoverImage, mock_setDescription):
# When
with pytest.raises(exceptions.StoryDoesNotExist):
self.chireads.extractChapterUrlsAndMetadata()
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_metadata(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
assert self.chireads.story.getMetadata('title') == 'Shadow Hack'
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_cover_image(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
mock_setCoverImage.assert_called_with(self.url, 'https://chireads.com/wp-content/uploads/2020/04/Shadow-Hack-2.jpg')
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_autor(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
assert self.chireads.story.getMetadata('author') == 'Great Lord of Cloudland'
assert self.chireads.story.getMetadata('authorId') == 'Great Lord of Cloudland'
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_dateUpdated(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
assert self.chireads.story.getMetadata('dateUpdated') == '2020-06-27'
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_novel_info(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
expected_intro = "Par coïncidence, Li Yunmu a découvert une super machine de lère des ténèbres de lhumanité. À partir de ce moment, sa vie ordinaire ne sera plus jamais la même ! Aptitude ? Talent inné ? Quest-ce que cest ? Ça se mange ? Je nai ni aptitude ni compétence innée, mais mon ombre peut monter en niveau en utilisant des Hack. Expérience, points de compétence, prouesse au combat …… .Tous pourraient être Hacké. Même endormi ou fatigué, je pourrais encore améliorer ses compétences. [Ding, ton ombre a tué une fourmi, tu as gagné des points dexpérience et des points daptitude.] [Ding, ton ombre a tué une libellule, elle a laissé tomber une boîte dimensionnelle.] Merde, même tuer des insectes peut également augmenter son expérience et obtenir des récompenses.Quoi de mieux !Light novel Shadow Hack en français /Traduction de Shadow Hack en Français / Shadow Hack FrTraduction en français : ZoroBonjour ou bonsoir à tous ! Mon nom est Zoro, j'arrive sur Chiread avec un novel, et quel novel ?! Shadow Hack, du fight, de la chance, du cheaté, et j'en passe. Vraiment, un gros kiff à lire à tout prix !"
mock_setDescription.assert_called_with(self.url, expected_intro)
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_novel_info(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
# Given
mockFetchUrl.return_value = chireads_html_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
assert self.chireads.get_chapters()[0]['title'] == 'Chapitre 1 Ombre mystérieuse'
assert self.chireads.get_chapters()[0]['url'] == 'https://chireads.com/translatedtales/chapitre-1-ombre-mysterieuse/2020/02/08/'
assert self.chireads.get_chapters()[10]['title'] == 'Chapitre 11 Bataille injuste'
assert self.chireads.get_chapters()[10]['url'] == 'https://chireads.com/translatedtales/chapitre-11-bataille-injuste/2020/02/08/'
assert self.chireads.get_chapters()[100]['title'] == 'Chapitre 101 La rancune de sœur Noujie'
assert self.chireads.get_chapters()[100]['url'] == 'https://chireads.com/translatedtales/chapitre-101-la-rancune-de-soeur-noujie/2020/02/08/'
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_novel_info_when_book(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_with_chapters_return):
# Given
mockFetchUrl.return_value = chireads_html_with_chapters_return
# When
self.chireads.extractChapterUrlsAndMetadata()
# Then
assert self.chireads.get_chapters()[0]['title'] == 'Chapitre 01 : Matinée au village'
assert self.chireads.get_chapters()[0]['url'] == 'https://chireads.com/sur-le-web/chapitre-01-matinee-au-village/2017/08/17/'
assert self.chireads.get_chapters()[23]['title'] == 'Chapitre 1 : La créature magique, Souris Fantôme'
assert self.chireads.get_chapters()[23]['url'] == 'https://chireads.com/sur-le-web/chapitre-1-la-creature-magique-souris-fantome/2017/08/18/'
class TestGetChapterText:
def setup_method(self):
self.url = 'https://chireads.com/category/translatedtales/some-story/'
self.chapter_url = 'https://chireads.com/translatedtales/chapitre-1-some-title/2020/02/08/'
self.configuration = Configuration(["chireads.com"], "EPUB", lightweight=True)
self.chireads = chiread(self.configuration, self.url)
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
def test_get_metadata(self, mockFetchUrl, chireads_html_chapter_return):
# Given
mockFetchUrl.return_value = chireads_html_chapter_return
# When
response = self.chireads.getChapterText(self.chapter_url)
# Then
expected = [
"Dans une petite pièce chaude, alors que Li Yunmu avait allumé son ordinateur, cette phrase est soudainement apparue devant lui.",
"Un véritable ordinateur de lère sombre aurait certainement un prix de départ de pièces de la cinquième dimension.",
"Comment un citoyen aussi vulgaire que lui-même pourrait-il avoir les qualifications nécessaires pour accéder aux pièces de la cinquième dimension de lAlliance ?",
"À ce moment, la silhouette de Li Yunmu est devenue sans vie !"
]
for p in expected:
assert p in response

File diff suppressed because one or more lines are too long