1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2026-04-20 11:30:48 +02:00

Helper for URL-joining

This commit is contained in:
David Lynch 2019-05-29 01:55:35 -05:00
parent 4d93f84efa
commit 2bd5d77715
4 changed files with 10 additions and 9 deletions

View file

@ -5,6 +5,7 @@ import os
import uuid
import time
import logging
import urllib
import attr
from bs4 import BeautifulSoup
@ -150,6 +151,9 @@ class Site:
soup = BeautifulSoup("", 'html5lib')
return soup.new_tag(*args, **kw)
def _join_url(self, *args, **kwargs):
return urllib.parse.urljoin(*args, **kwargs)
def _footnote(self, contents, chapterid):
"""Register a footnote and return a link to that footnote"""

View file

@ -3,7 +3,6 @@
import logging
import datetime
import re
import urllib
import requests_cache
from bs4 import BeautifulSoup
from . import register, Site, Section, Chapter
@ -38,7 +37,7 @@ class ArchiveOfOurOwn(Site):
# I feel the session *should* handle this cookies bit for me. But
# it doesn't. And I don't know why.
self.session.post(
urllib.parse.urljoin(login.url, str(form.get('action'))),
self._join_url(login.url, str(form.get('action'))),
data=post, cookies=login.cookies
)
logger.info("Logged in as %s", login_details[0])

View file

@ -5,7 +5,6 @@ import attr
import datetime
import json
import os.path
import urllib
from . import register, Site, Section, Chapter
logger = logging.getLogger(__name__)
@ -70,8 +69,8 @@ class Arbitrary(Site):
for chapter_link in soup.select(definition.chapter_selector):
chapter_url = str(chapter_link.get('href'))
if base:
chapter_url = urllib.parse.urljoin(base, chapter_url)
chapter_url = urllib.parse.urljoin(definition.url, chapter_url)
chapter_url = self._join_url(base, chapter_url)
chapter_url = self._join_url(definition.url, chapter_url)
for chapter in self._chapter(chapter_url, definition, title=chapter_link.string):
story.add(chapter)
else:
@ -86,8 +85,8 @@ class Arbitrary(Site):
if next_link:
next_link_url = str(next_link[0].get('href'))
if base:
next_link_url = urllib.parse.urljoin(base, next_link_url)
content_url = urllib.parse.urljoin(content_url, next_link_url)
next_link_url = self._join_url(base, next_link_url)
content_url = self._join_url(content_url, next_link_url)
else:
content_url = False
else:

View file

@ -4,7 +4,6 @@ import http.client
import logging
import datetime
import re
import urllib
from . import register, Site, Section, Chapter
logger = logging.getLogger(__name__)
@ -38,7 +37,7 @@ class RoyalRoad(Site):
)
for chapter in soup.select('#chapters tbody tr[data-url]'):
chapter_url = str(urllib.parse.urljoin(story.url, str(chapter.get('data-url'))))
chapter_url = str(self._join_url(story.url, str(chapter.get('data-url'))))
contents, updated = self._chapter(chapter_url)