From 5507e72f45aa0dc642d0c966bfa7777d6f782b75 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 9 Oct 2011 15:29:36 +0200 Subject: [PATCH 1/4] Add Amazon.fr --- src/calibre/customize/builtins.py | 11 ++ .../gui2/store/stores/amazon_fr_plugin.py | 114 ++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 src/calibre/gui2/store/stores/amazon_fr_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index a2c0596e0b..79e5259c00 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1143,6 +1143,16 @@ class StoreAmazonDEKindleStore(StoreBase): formats = ['KINDLE'] affiliate = True +class StoreAmazonFRKindleStore(StoreBase): + name = 'Amazon FR Kindle' + author = 'Charles Haley' + description = u'Tous les ebooks Kindle' + actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore' + + headquarters = 'DE' + formats = ['KINDLE'] + affiliate = True + class StoreAmazonUKKindleStore(StoreBase): name = 'Amazon UK Kindle' author = 'Charles Haley' @@ -1520,6 +1530,7 @@ class StoreZixoStore(StoreBase): StoreArchiveOrgStore, StoreAmazonKindleStore, StoreAmazonDEKindleStore, + StoreAmazonFRKindleStore, StoreAmazonUKKindleStore, StoreBaenWebScriptionStore, StoreBNStore, diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py new file mode 100644 index 0000000000..a5b97751ca --- /dev/null +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +import urllib +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.search_result import SearchResult + +class AmazonFRKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + + def open(self, parent=None, detail_item=None, external=False): + aff_id = {'tag': 'charhale-21'} + store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id + + if detail_item: + aff_id['asin'] = detail_item + store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id + open_url(QUrl(store_link)) + + def search(self, query, max_results=10, timeout=60): + search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' + url = search_url + urllib.quote_plus(query) + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read()) + + data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' + format_xpath = './/span[@class="format"]/text()' + cover_xpath = './/img[@class="productImage"]/@src' + + for data in doc.xpath(data_xpath): + if counter <= 0: + break + + # Even though we are searching digital-text only Amazon will still + # put in results for non Kindle books (author pages). So we need + # to explicitly check if the item is a Kindle book and ignore it + # if it isn't. + format = ''.join(data.xpath(format_xpath)) + if 'kindle' not in format.lower(): + continue + + # We must have an asin otherwise we can't easily reference the + # book later. + asin = ''.join(data.xpath("@name")) + + cover_url = ''.join(data.xpath(cover_xpath)) + + title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) + author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) + author = author.split('de ')[-1] + +# print (author, asin, cover_url, title, price) + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url.strip() + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = asin.strip() + s.formats = 'Kindle' + s.DRM = SearchResult.DRM_UNKNOWN + yield s + + def get_details(self, search_result, timeout): + # We might already have been called. + if search_result.drm: + return + + url = 'http://amazon.fr/dp/' + drm_search_text = u'Simultaneous Device Usage' + drm_free_text = u'Unlimited' + + br = browser() + with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: + idata = html.fromstring(nf.read()) + if not search_result.author: + search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()')) + is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])') + if is_kindle: + search_result.formats = 'Kindle' + if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + + drm_search_text + '")])'): + if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + + drm_free_text + '") and contains(b, "' + + drm_search_text + '")])'): + search_result.drm = SearchResult.DRM_UNLOCKED + else: + search_result.drm = SearchResult.DRM_UNKNOWN + else: + search_result.drm = SearchResult.DRM_LOCKED + return True + + From 7003ae2aa991052d226ba7b4da9e55d764e9438a Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 9 Oct 2011 10:54:19 -0400 Subject: [PATCH 2/4] Fix encoding issues when searching and displaying results for Amazon plugins. --- .../gui2/store/stores/amazon_de_plugin.py | 5 +-- .../gui2/store/stores/amazon_fr_plugin.py | 43 +++---------------- .../gui2/store/stores/amazon_plugin.py | 5 +-- .../gui2/store/stores/amazon_uk_plugin.py | 5 +-- 4 files changed, 11 insertions(+), 47 deletions(-) diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 9f26e765e6..4948a48714 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -6,7 +6,6 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -import urllib from contextlib import closing from lxml import html @@ -37,12 +36,12 @@ def open(self, parent=None, detail_item=None, external=False): def search(self, query, max_results=10, timeout=60): search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' - url = search_url + urllib.quote_plus(query) + url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) + doc = html.fromstring(f.read().decode('latin-1', 'replace')) # Amazon has two results pages. # 20110725: seems that is_shot is gone. diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index a5b97751ca..186ca8d4b4 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -6,7 +6,6 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -import urllib from contextlib import closing from lxml import html @@ -34,12 +33,12 @@ def open(self, parent=None, detail_item=None, external=False): def search(self, query, max_results=10, timeout=60): search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' - url = search_url + urllib.quote_plus(query) + url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) + doc = html.fromstring(f.read().decode('latin-1', 'replace')) data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' format_xpath = './/span[@class="format"]/text()' @@ -66,9 +65,7 @@ def search(self, query, max_results=10, timeout=60): title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) - author = author.split('de ')[-1] - -# print (author, asin, cover_url, title, price) + author = author.split('et ')[-1] counter -= 1 @@ -79,36 +76,6 @@ def search(self, query, max_results=10, timeout=60): s.price = price.strip() s.detail_item = asin.strip() s.formats = 'Kindle' - s.DRM = SearchResult.DRM_UNKNOWN + s.drm = SearchResult.DRM_UNKNOWN + yield s - - def get_details(self, search_result, timeout): - # We might already have been called. - if search_result.drm: - return - - url = 'http://amazon.fr/dp/' - drm_search_text = u'Simultaneous Device Usage' - drm_free_text = u'Unlimited' - - br = browser() - with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: - idata = html.fromstring(nf.read()) - if not search_result.author: - search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()')) - is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])') - if is_kindle: - search_result.formats = 'Kindle' - if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' + - drm_search_text + '")])'): - if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' + - drm_free_text + '") and contains(b, "' + - drm_search_text + '")])'): - search_result.drm = SearchResult.DRM_UNLOCKED - else: - search_result.drm = SearchResult.DRM_UNKNOWN - else: - search_result.drm = SearchResult.DRM_LOCKED - return True - - diff --git a/src/calibre/gui2/store/stores/amazon_plugin.py b/src/calibre/gui2/store/stores/amazon_plugin.py index 693ef883fb..89a6278535 100644 --- a/src/calibre/gui2/store/stores/amazon_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_plugin.py @@ -8,7 +8,6 @@ import random import re -import urllib from contextlib import closing from lxml import html @@ -122,12 +121,12 @@ def open(self, parent=None, detail_item=None, external=False): open_url(QUrl(store_link)) def search(self, query, max_results=10, timeout=60): - url = self.search_url + urllib.quote_plus(query) + url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) + doc = html.fromstring(f.read().decode('latin-1', 'replace')) # Amazon has two results pages. is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])') diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 86603f3fc3..3b2a4d05cc 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -6,7 +6,6 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -import urllib from contextlib import closing from lxml import html @@ -34,12 +33,12 @@ def open(self, parent=None, detail_item=None, external=False): def search(self, query, max_results=10, timeout=60): search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' - url = search_url + urllib.quote_plus(query) + url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) + doc = html.fromstring(f.read().decode('latin-1', 'replace')) # Amazon has two results pages. # 20110725: seems that is_shot is gone. From 9e114a9e1f763c8160c652eb45c1f80e85a527b5 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 9 Oct 2011 17:39:25 +0200 Subject: [PATCH 3/4] Clean up/fix code to strip leading "by " in German and French. --- .../gui2/store/stores/amazon_de_plugin.py | 17 ++--------------- .../gui2/store/stores/amazon_fr_plugin.py | 5 +++-- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 4948a48714..ea92839268 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -43,20 +43,9 @@ def search(self, query, max_results=10, timeout=60): with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read().decode('latin-1', 'replace')) - # Amazon has two results pages. - # 20110725: seems that is_shot is gone. -# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])') -# # Horizontal grid of books. -# if is_shot: -# data_xpath = '//div[contains(@class, "result")]' -# format_xpath = './/div[@class="productTitle"]/text()' -# cover_xpath = './/div[@class="productTitle"]//img/@src' -# # Vertical list of books. -# else: data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' format_xpath = './/span[@class="format"]/text()' cover_xpath = './/img[@class="productImage"]/@src' -# end is_shot else for data in doc.xpath(data_xpath): if counter <= 0: @@ -79,11 +68,9 @@ def search(self, query, max_results=10, timeout=60): title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) -# if is_shot: -# author = format.split(' von ')[-1] -# else: author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')) - author = author.split('von ')[-1] + if author.startswith('von '): + author = author[4:] counter -= 1 diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index 186ca8d4b4..ca36f1055b 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -65,7 +65,8 @@ def search(self, query, max_results=10, timeout=60): title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))) - author = author.split('et ')[-1] + if author.startswith('de '): + author = author[3:] counter -= 1 @@ -77,5 +78,5 @@ def search(self, query, max_results=10, timeout=60): s.detail_item = asin.strip() s.formats = 'Kindle' s.drm = SearchResult.DRM_UNKNOWN - + yield s From 18ed5671c699b90dbee72dd7dd2a4e426ee84148 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 9 Oct 2011 17:48:40 +0200 Subject: [PATCH 4/4] Fix 'by' splitting in the UK plugin. --- .../gui2/store/stores/amazon_uk_plugin.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 3b2a4d05cc..ef15951d50 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -40,20 +40,9 @@ def search(self, query, max_results=10, timeout=60): with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read().decode('latin-1', 'replace')) - # Amazon has two results pages. - # 20110725: seems that is_shot is gone. -# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])') -# # Horizontal grid of books. -# if is_shot: -# data_xpath = '//div[contains(@class, "result")]' -# format_xpath = './/div[@class="productTitle"]/text()' -# cover_xpath = './/div[@class="productTitle"]//img/@src' -# # Vertical list of books. -# else: data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]' format_xpath = './/span[@class="format"]/text()' cover_xpath = './/img[@class="productImage"]/@src' -# end is_shot else for data in doc.xpath(data_xpath): if counter <= 0: @@ -76,11 +65,9 @@ def search(self, query, max_results=10, timeout=60): title = ''.join(data.xpath('.//div[@class="title"]/a/text()')) price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()')) -# if is_shot: -# author = format.split(' von ')[-1] -# else: author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')) - author = author.split('by ')[-1] + if author.startswith('by '): + author = author[3:] counter -= 1