From 60dc17c32c951c1c6a51a67875d4b063293ee54d Mon Sep 17 00:00:00 2001
From: Jim Miller
Date: Tue, 5 Jun 2012 23:45:03 -0500
Subject: [PATCH] Fixes for fanfiction.net's latest changes.
---
.../adapters/adapter_fanfictionnet.py | 91 ++++++++-----------
fanficdownloader/story.py | 7 +-
index.html | 4 +
3 files changed, 50 insertions(+), 52 deletions(-)
diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py
index a536c7ab..47996c7d 100644
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@@ -23,9 +23,14 @@ import time
from .. import BeautifulSoup as bs
from .. import exceptions as exceptions
+from ..htmlcleanup import stripHTML
from base_adapter import BaseSiteAdapter, makeDate
+ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
+ "Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
+ "Spiritual", "Supernatural", "Suspense", "Tragedy", "Western"]
+
class FanFictionNetSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@@ -198,61 +203,45 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## Pull some additional data from html. Find Rating and look around it.
a = soup.find('a', href='http://www.fictionratings.com/')
- self.story.setMetadata('rating',a.string)
+ rating = a.string
+ if 'Fiction' in rating: # if rating has 'Fiction ', strip that out for consistency with past.
+ rating = rating[8:]
+
+ self.story.setMetadata('rating',rating)
- # used below to get correct characters.
- metatext = a.findNext(text=re.compile(r' - Reviews:'))
- if metatext == None: # indicates there's no Reviews, look for id: instead.
- metatext = a.findNext(text=re.compile(r' - id:'))
-
- m = re.match(r" - (?P[^ ]+)",metatext)
- if m.group('lang') != None:
- self.story.setMetadata('language',m.group('lang'))
-
# after Rating, the same bit of text containing id:123456 contains
# Complete--if completed.
- if 'Complete' in soup.find(text=re.compile(r'id:'+self.story.getMetadata('storyId'))):
+ gui_table1i = soup.find(id="gui_table1i")
+ metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort')
+ metalist = metatext.split(" - ")
+ #print("metatext:(%s)"%metalist)
+
+ # rating is obtained above more robustly.
+ if metalist[0].startswith('Rated:'):
+ metalist=metalist[1:]
+
+ # next is assumed to be language.
+ self.story.setMetadata('language',metalist[0])
+ metalist=metalist[1:]
+
+ # next might be genre.
+ genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
+ goodgenres=True
+ for g in genrelist:
+ if g not in ffnetgenres:
+ goodgenres=False
+ if goodgenres:
+ self.story.extendList('genre',genrelist)
+ metalist=metalist[1:]
+
+ # next might be characters, otherwise Reviews, Updated or Published
+ if not ( metalist[0].startswith('Reviews') or metalist[0].startswith('Updated') or metalist[0].startswith('Published') ):
+ self.story.extendList('characters',metalist[0].split(' & '))
+
+ if 'Status: Complete' in metatext:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
-
-
- # Parse genre(s) from
- #
- # (fp)
- #
- #
- #
- # Chapter 1 of a SpongeBob SquarePants - Romance/Humor fanfiction with characters SpongeBob. Bob Esponja tiene un admirador secreto ¿quien será?. update existing id:1684
- m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) ?(?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?P[ ]+with characters)?",
- soup.find('meta',{'name':'description'})['content'])
- #print("meta desc:%s"%soup.find('meta',{'name':'description'})['content'])
- if m != None:
- genres=m.group('genres')
- if genres != None:
- # Hurt/Comfort is one genre.
- genres=re.sub('Hurt/Comfort','Hurt-Comfort',genres)
- for g in genres.split('/'):
- self.story.addToList('genre',g)
-
- if m.group('chars') != None:
-
- # At this point we've proven that there's character(s)
- # We can't reliably parse characters out of meta name="description".
- # There's no way to tell that "with characters Ichigo K. & Neliel T. O./Nel. " ends at "Nel.", not "T."
- # But we can pull them from the reviewstext line, now that we know about existance of chars.
- # reviewstext can take form of:
- # - English - Shinji H. - Updated: 01-13-12 - Published: 12-20-11 - id:7654123
- # - English - Adventure/Angst - Ichigo K. & Neliel T. O./Nel - Reviews:
- # - English - Humor/Adventure - Harry P. & Ironhide - Reviews:
- # - Spanish - Romance/Humor - SpongeBob - Reviews:
- #print("metatext:%s"%metatext)
- mc = re.match(r" - (?P[^ ]+ - )(?P[^ ]+ - )? ?(?P.+?) - (Reviews|Updated|Published)",
- metatext)
- chars = mc.group("chars")
- for c in chars.split('&'):
- self.story.addToList('characters',c.strip())
return
def getChapterText(self, url):
@@ -269,10 +258,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
if sharediv:
sharediv.extract()
- div = soup.find('div', {'id' : 'storytext'})
+ div = soup.find('div', {'id' : 'storytextp'})
if None == div:
- logging.debug('div id=storytext not found. data:%s'%data)
+ logging.debug('div id=storytextp not found. data:%s'%data)
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)
diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py
index 7dc7bdb0..d6b5f881 100644
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@@ -252,7 +252,12 @@ class Story:
allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
return allmetadata
-
+
+ # just for less clutter in adapters.
+ def extendList(self,listname,l):
+ for v in l:
+ self.addToList(listname,v)
+
def addToList(self,listname,value):
if value==None:
return
diff --git a/index.html b/index.html
index 2270782a..cc9a3cdf 100644
--- a/index.html
+++ b/index.html
@@ -54,6 +54,10 @@
much easier.
+ fanfiction.net fixed
+
+ I hope. I'm parsing it a different way now that I hope will work in all cases. Story text should be fine, but keep an eye on the metadata.
+
New Sites
New sites www.dokuga.com, www.ik-eternal.net added. Thanks, Ida.