diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 14693b61..8e3d4dc6 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -445,6 +445,7 @@ permitted_values = { 'series' : ['series'], 'enumeration' : ['category', 'genre', + 'language', 'series', 'characters', 'status', @@ -477,6 +478,7 @@ permitted_values['comments'] = permitted_values['enumeration'] titleLabels = { 'category':'Category', 'genre':'Genre', + 'language':'Language', 'status':'Status', 'status-C':'Status:Completed', 'status-I':'Status:In-Progress', diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index d1254216..19003405 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -52,9 +52,6 @@ formmapping = { PLUGIN_ICONS = ['images/icon.png'] -sendlists = ["Send to Nook", "Send to Kindle", "Send to Droid", "Add to Nook", "Add to Kindle", "Add to Droid"] -readlists = ["000"] - class FanFictionDownLoaderPlugin(InterfaceAction): name = 'FanFictionDownLoader' @@ -742,11 +739,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction): if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) # mi.tags needs to be list, but set kills dups. - mi.tags = list(set(list(old_tags)+mi.tags)) - # Set language english, but only if not already set. - oldmi = db.get_metadata(book_id,index_is_id=True) - if not oldmi.languages: - mi.languages=['eng'] + mi.tags = list(set(list(old_tags)+mi.tags)) + + if 'langcode' in book['all_metadata']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] + db.set_metadata(book_id,mi) # do configured column updates here. @@ -971,40 +973,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): except: return None; - - -def get_job_details(job): - ''' - Convert the job result into a set of parameters including a detail message - summarising the success of the extraction operation. - This is used by both the threaded and worker approaches to extraction - ''' - extracted_ids, same_isbn_ids, failed_ids, no_format_ids = job.result - if not hasattr(job, 'html_details'): - job.html_details = job.details - det_msg = [] - for i, title in failed_ids: - if i in no_format_ids: - msg = title + ' (No formats)' - else: - msg = title + ' (ISBN not found)' - det_msg.append(msg) - if same_isbn_ids: - if det_msg: - det_msg.append('----------------------------------') - for i, title in same_isbn_ids: - msg = title + ' (Same ISBN)' - det_msg.append(msg) - if len(extracted_ids) > 0: - if det_msg: - det_msg.append('----------------------------------') - for i, title, last_modified, isbn in extracted_ids: - msg = '%s (Extracted %s)'%(title, isbn) - det_msg.append(msg) - - det_msg = '\n'.join(det_msg) - return extracted_ids, same_isbn_ids, failed_ids, det_msg - def get_url_list(urls): def f(x): if x.strip(): return True diff --git a/defaults.ini b/defaults.ini index 86d583c4..5b623f97 100644 --- a/defaults.ini +++ b/defaults.ini @@ -36,6 +36,7 @@ formatext_label:File Extension ## Sometimes there are multiple categories and/or genres. category_label:Category genre_label:Genre +language_label:Language characters_label:Characters series_label:Series ## Completed/In-Progress @@ -67,7 +68,7 @@ version_label:FFDL Version ## items to include in the title page ## Empty entries will *not* appear, even if in the list. ## All current formats already include title and author. -titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description +titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## Try to collect series name and number of this story in series. ## Some sites (ab)use 'series' for reading lists and personal @@ -176,7 +177,7 @@ output_css: [txt] ## Add URLs since there aren't links. -titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description +titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description ## use \r\n for line endings, the windows convention. text output only. windows_eol: true diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 29d65a1c..aa50d420 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -201,7 +201,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): metatext = a.findNext(text=re.compile(r' - Reviews:')) if metatext == None: # indicates there's no Reviews, look for id: instead. metatext = a.findNext(text=re.compile(r' - id:')) - #print("========= metatext:\n%s"%metatext) # after Rating, the same bit of text containing id:123456 contains # Complete--if completed. @@ -215,7 +214,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # # (fp) # - m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?:[ ]+with characters (?P.*?\.?)(?: & (?P.*?\.?))?\. )?", + # + m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?P[ ]+with characters)?", soup.find('meta',{'name':'description'})['content']) if m != None: genres=m.group('genres') @@ -225,7 +225,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): for g in genres.split('/'): self.story.addToList('genre',g) - if m.group('char1') != None: + if m.group('chars') != None: + # At this point we've proven that there's character(s) # We can't reliably parse characters out of meta name="description". # There's no way to tell that "with characters Ichigo K. & Neliel T. O./Nel. " ends at "Nel.", not "T." @@ -233,12 +234,16 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # reviewstext can take form of: # - English - Shinji H. - Updated: 01-13-12 - Published: 12-20-11 - id:7654123 # - English - Adventure/Angst - Ichigo K. & Neliel T. O./Nel - Reviews: + # - English - Humor/Adventure - Harry P. & Ironhide - Reviews: mc = re.match(r" - (?P[^ ]+ - )(?P[^ ]+ - )? (?P.+?) - (Reviews|Updated|Published)", metatext) chars = mc.group("chars") for c in chars.split(' & '): self.story.addToList('characters',c) - + m = re.match(r" - (?P[^ ]+)",metatext) + if m.group('lang') != None: + self.story.setMetadata('language',m.group('lang')) + return def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 9ed33178..4fbd6021 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -90,6 +90,16 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" self.story.setMetadata('status','In-Progress') else: self.story.setMetadata('status','Completed') + + langs = { + 0:"English", + 1:"Russian", + 2:"French", + 3:"German", + } + if idnum < 10: + self.story.setMetadata('language',langs[idnum%len(langs)]) + # greater than 10, no language. self.setSeries('The Great Test',idnum) @@ -117,7 +127,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" ('Chapter 4',self.url+"&chapter=5"), ('Chapter 5',self.url+"&chapter=6"), ('Chapter 6',self.url+"&chapter=6"), - # ('Chapter 7',self.url+"&chapter=6"), + ('Chapter 7',self.url+"&chapter=6"), # ('Chapter 8',self.url+"&chapter=6"), # ('Chapter 9',self.url+"&chapter=6"), # ('Chapter 0',self.url+"&chapter=6"), diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 81336b59..ba3def4a 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -19,6 +19,50 @@ import os, re from htmlcleanup import conditionalRemoveEntities, removeAllEntities +# The list comes from ffnet, the only multi-language site we support +# at the time of writing. Values are taken largely from pycountry, +# but with some corrections and guesses. +langs = { + "English":"en", + "Spanish":"es", + "French":"fr", + "German":"de", + "Chinese":"zh", + "Japanese":"ja", + "Dutch":"nl", + "Portuguese":"pt", + "Russian":"ru", + "Italian":"it", + "Bulgarian":"bg", + "Polish":"pl", + "Hungarian":"hu", + "Hebrew":"he", + "Arabic":"ar", + "Swedish":"sv", + "Norwegian":"no", + "Danish":"da", + "Finnish":"fi", + "Filipino":"fil", + "Esperanto":"eo", + "Hindi":"hi", + "Punjabi":"pa", + "Farsi":"fa", + "Greek":"el", + "Romanian":"ro", + "Albanian":"sq", + "Serbian":"sr", + "Turkish":"tr", + "Czech":"cs", + "Indonesian":"id", + "Croatian":"hr", + "Catalan":"ca", + "Latin":"la", + "Korean":"ko", + "Vietnamese":"vi", + "Thai":"th", + "Devanagari":"hi", + } + class Story: def __init__(self): @@ -33,6 +77,11 @@ class Story: def setMetadata(self, key, value): ## still keeps < < and & self.metadata[key]=conditionalRemoveEntities(value) + if key == "language": + try: + self.metadata['langcode'] = langs[self.metadata[key]] + except: + self.metadata['langcode'] = 'en' def getMetadataRaw(self,key): if self.metadata.has_key(key): @@ -111,7 +160,6 @@ class Story: def setReplace(self,replace): for line in replace.splitlines(): if "=>" in line: - print("line:%s"%line) self.replacements.append(map( lambda x: x.strip(), line.split("=>") )) def commaGroups(s): diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index de1514bd..84a6f5c5 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -52,6 +52,7 @@ class BaseStoryWriter(Configurable): self.validEntries = [ 'category', 'genre', + 'language', 'characters', 'series', 'status', @@ -80,6 +81,7 @@ class BaseStoryWriter(Configurable): self.titleLabels = { 'category':'Category', 'genre':'Genre', + 'language':'Language', 'status':'Status', 'series':'Series', 'characters':'Characters', diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index acd0dcda..e423556d 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -203,7 +203,10 @@ class EpubWriter(BaseStoryWriter): metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"})) metadata.appendChild(newTag(contentdom,"dc:rights",text="")) - metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + if self.story.getMetadata('langcode') != None: + metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode'))) + else: + metadata.appendChild(newTag(contentdom,"dc:language",text='en')) # published, created, updated, calibre # Leave calling self.story.getMetadataRaw directly in case date format changes. @@ -399,4 +402,4 @@ def newTag(dom,name,attrs=None,text=None): if( text is not None ): tag.appendChild(dom.createTextNode(text)) return tag - + diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 3152ed6e..55eed582 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -41,6 +41,7 @@ formatext_label:File Extension ## Sometimes there are multiple categories and/or genres. category_label:Category genre_label:Genre +language_label:Language characters_label:Characters series_label:Series ## Completed/In-Progress @@ -72,7 +73,7 @@ version_label:FFDL Version ## items to include in the title page ## Empty entries will *not* appear, even if in the list. ## All current formats already include title and author. -titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description +titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## Try to collect series name and number of this story in series. ## Some sites (ab)use 'series' for reading lists and personal @@ -154,7 +155,7 @@ output_css: [txt] ## Add URLs since there aren't links. -titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description +titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description ## use \r\n for line endings, the windows convention. text output only. windows_eol: true