mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-02 14:13:03 +01:00
feat: add chireads.com adapter and tests on this one
This commit is contained in:
parent
c92effa01b
commit
1669c06703
4 changed files with 933 additions and 0 deletions
|
|
@ -169,6 +169,7 @@ from . import adapter_fanficsme
|
|||
from . import adapter_archivehpfanfictalkcom
|
||||
from . import adapter_scifistoriescom
|
||||
from . import adapter_silmarillionwritersguildorg
|
||||
from . import adapter_chireadscom
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
|
|||
120
fanficfare/adapters/adapter_chireadscom.py
Normal file
120
fanficfare/adapters/adapter_chireadscom.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode, ensure_text
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return ChireadsComSiteAdapter
|
||||
|
||||
|
||||
class ChireadsComSiteAdapter(BaseSiteAdapter):
|
||||
NEW_DATE_FORMAT = '%Y/%m/%d %H:%M:%S'
|
||||
OLD_DATE_FORMAT = '%m/%d/%Y %I:%M:%S %p'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'chireads')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('https://%s/category/translatedtales/%s/' % (self.getSiteDomain(), story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'chireads.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['chireads.com']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/category/translatedtales/story-name' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://chireads\.com/category/translatedtales/(?P<id>[^/]+)(/)?'
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
except HTTPError as exception:
|
||||
if exception.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
|
||||
raise exception
|
||||
|
||||
soup = self.make_soup(data)
|
||||
info = soup.select_one('.inform-inform-data')
|
||||
self.story.setMetadata('title', stripHTML(info.h3).split(' | ')[0])
|
||||
|
||||
self.setCoverImage(self.url, soup.select_one('.inform-product > img')['src'])
|
||||
|
||||
# Unicode strings because ':' isn't ':', but \xef\xbc\x9a
|
||||
# author = stripHTML(info.h6).split(u' ')[0].replace(u'Auteur : ', '', 1)
|
||||
|
||||
author = stripHTML(info.h6).split('Babelcheck')[0].replace('Auteur : ', '').replace('\xc2\xa0', '')
|
||||
# author = stripHTML(info.h6).split('\xa0')[0].replace(u'Auteur : ', '', 1)
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
|
||||
datestr = stripHTML(soup.select_one('.newestchapitre > div > a')['href'])[-11:-1]
|
||||
date = makeDate(datestr, '%Y/%m/%d')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||
self.setDescription(self.url, intro)
|
||||
|
||||
for content in soup.findAll('div', {'id': 'content'}):
|
||||
for a in content.findAll('a'):
|
||||
self.add_chapter(a.get_text(), a['href'])
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
content = soup.select_one('#content')
|
||||
|
||||
if None == content:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
143
fanficfare/adapters/tests/adapters/test_adapter_chireadscom.py
Normal file
143
fanficfare/adapters/tests/adapters/test_adapter_chireadscom.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
import pytest
|
||||
|
||||
# from unittest import mock
|
||||
from unittest.mock import patch # , call, Mock
|
||||
|
||||
from fanficfare.six.moves.urllib.error import HTTPError
|
||||
|
||||
from fanficfare.adapters.adapter_chireadscom import ChireadsComSiteAdapter as chiread
|
||||
from fanficfare.configurable import Configuration
|
||||
from fanficfare import exceptions
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter.setDescription')
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter.setCoverImage')
|
||||
class TestExtractChapterUrlsAndMetadata:
|
||||
def setup_method(self):
|
||||
self.url = 'https://chireads.com/category/translatedtales/some-story/'
|
||||
self.configuration = Configuration(["chireads.com"], "EPUB", lightweight=True)
|
||||
self.chireads = chiread(self.configuration, self.url)
|
||||
|
||||
def test_raise_404_for_unexistant_story(self, mock_setCoverImage, mock_setDescription):
|
||||
# When
|
||||
with pytest.raises(exceptions.StoryDoesNotExist):
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_metadata(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.chireads.story.getMetadata('title') == 'Shadow Hack'
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_cover_image(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
mock_setCoverImage.assert_called_with(self.url, 'https://chireads.com/wp-content/uploads/2020/04/Shadow-Hack-2.jpg')
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_autor(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.chireads.story.getMetadata('author') == 'Great Lord of Cloudland'
|
||||
assert self.chireads.story.getMetadata('authorId') == 'Great Lord of Cloudland'
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_dateUpdated(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.chireads.story.getMetadata('dateUpdated') == '2020-06-27'
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_novel_info(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
expected_intro = "Par coïncidence, Li Yunmu a découvert une super machine de l’ère des ténèbres de l’humanité. À partir de ce moment, sa vie ordinaire ne sera plus jamais la même ! Aptitude ? Talent inné ? Qu’est-ce que c’est ? Ça se mange ? Je n’ai ni aptitude ni compétence innée, mais mon ombre peut monter en niveau en utilisant des Hack. Expérience, points de compétence, prouesse au combat …… .Tous pourraient être Hacké. Même endormi ou fatigué, je pourrais encore améliorer ses compétences. [Ding, ton ombre a tué une fourmi, tu as gagné des points d’expérience et des points d’aptitude.] [Ding, ton ombre a tué une libellule, elle a laissé tomber une boîte dimensionnelle.] Merde, même tuer des insectes peut également augmenter son expérience et obtenir des récompenses.Quoi de mieux !Light novel Shadow Hack en français /Traduction de Shadow Hack en Français / Shadow Hack FrTraduction en français : ZoroBonjour ou bonsoir à tous ! Mon nom est Zoro, j'arrive sur Chiread avec un novel, et quel novel ?! Shadow Hack, du fight, de la chance, du cheaté, et j'en passe. Vraiment, un gros kiff à lire à tout prix !"
|
||||
mock_setDescription.assert_called_with(self.url, expected_intro)
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_novel_info(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.chireads.get_chapters()[0]['title'] == 'Chapitre 1 – Ombre mystérieuse'
|
||||
assert self.chireads.get_chapters()[0]['url'] == 'https://chireads.com/translatedtales/chapitre-1-ombre-mysterieuse/2020/02/08/'
|
||||
assert self.chireads.get_chapters()[10]['title'] == 'Chapitre 11 – Bataille injuste'
|
||||
assert self.chireads.get_chapters()[10]['url'] == 'https://chireads.com/translatedtales/chapitre-11-bataille-injuste/2020/02/08/'
|
||||
assert self.chireads.get_chapters()[100]['title'] == 'Chapitre 101 – La rancune de sœur Noujie'
|
||||
assert self.chireads.get_chapters()[100]['url'] == 'https://chireads.com/translatedtales/chapitre-101-la-rancune-de-soeur-noujie/2020/02/08/'
|
||||
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_novel_info_when_book(self, mockFetchUrl, mock_setCoverImage, mock_setDescription, chireads_html_with_chapters_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_with_chapters_return
|
||||
|
||||
# When
|
||||
self.chireads.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.chireads.get_chapters()[0]['title'] == 'Chapitre 01 : Matinée au village'
|
||||
assert self.chireads.get_chapters()[0]['url'] == 'https://chireads.com/sur-le-web/chapitre-01-matinee-au-village/2017/08/17/'
|
||||
assert self.chireads.get_chapters()[23]['title'] == 'Chapitre 1 : La créature magique, Souris Fantôme'
|
||||
assert self.chireads.get_chapters()[23]['url'] == 'https://chireads.com/sur-le-web/chapitre-1-la-creature-magique-souris-fantome/2017/08/18/'
|
||||
|
||||
|
||||
class TestGetChapterText:
|
||||
def setup_method(self):
|
||||
self.url = 'https://chireads.com/category/translatedtales/some-story/'
|
||||
self.chapter_url = 'https://chireads.com/translatedtales/chapitre-1-some-title/2020/02/08/'
|
||||
self.configuration = Configuration(["chireads.com"], "EPUB", lightweight=True)
|
||||
self.chireads = chiread(self.configuration, self.url)
|
||||
|
||||
@patch('fanficfare.adapters.adapter_chireadscom.ChireadsComSiteAdapter._fetchUrl')
|
||||
def test_get_metadata(self, mockFetchUrl, chireads_html_chapter_return):
|
||||
# Given
|
||||
mockFetchUrl.return_value = chireads_html_chapter_return
|
||||
|
||||
# When
|
||||
response = self.chireads.getChapterText(self.chapter_url)
|
||||
|
||||
# Then
|
||||
expected = [
|
||||
"Dans une petite pièce chaude, alors que Li Yunmu avait allumé son ordinateur, cette phrase est soudainement apparue devant lui.",
|
||||
"Un véritable ordinateur de l’ère sombre aurait certainement un prix de départ de pièces de la cinquième dimension.",
|
||||
"Comment un citoyen aussi vulgaire que lui-même pourrait-il avoir les qualifications nécessaires pour accéder aux pièces de la cinquième dimension de l’Alliance ?",
|
||||
"À ce moment, la silhouette de Li Yunmu est devenue sans vie !"
|
||||
]
|
||||
for p in expected:
|
||||
assert p in response
|
||||
669
fanficfare/adapters/tests/conftest.py
Normal file
669
fanficfare/adapters/tests/conftest.py
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue