diff --git a/resources/recipes/180.recipe b/resources/recipes/180.recipe new file mode 100644 index 0000000000..5158bb99e0 --- /dev/null +++ b/resources/recipes/180.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +180.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = '180.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf-8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})] + remove_tags = [ + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Titulares', u'http://www.180.com.uy/feed.php') + ] + + def get_cover_url(self): + return 'http://www.180.com.uy/tplef/img/logo.gif' + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/bitacora.recipe b/resources/recipes/bitacora.recipe new file mode 100644 index 0000000000..a36eb52988 --- /dev/null +++ b/resources/recipes/bitacora.recipe @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +bitacora.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'bitacora.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'iso-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['txt'])] + remove_tags = [ + dict(name='div', attrs={'class':'tablafoot'}), + dict(name=['object','h4']), + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.bitacora.com.uy' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'imgtapa'}) + if link_item: + cover_url = "http://www.bitacora.com.uy/"+link_item['src'] + return cover_url + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/cosmopolitan.recipe b/resources/recipes/cosmopolitan.recipe new file mode 100644 index 0000000000..d7d3db7e0c --- /dev/null +++ b/resources/recipes/cosmopolitan.recipe @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +Muy Interesante +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Cosmopolitan' + __author__ = 'Gustavo Azambuja' + description = 'Revista Cosmopolitan, Edicion Espanola' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 1 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + conversion_options = {'linearize_tables': True} + + oldest_article = 180 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(id=['contenido']), + dict(name='td', attrs={'class':['contentheading', 'txt_articulo']}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}), + dict(name='div', attrs={'id':'comment'}), + dict(name='table', attrs={'class':'pagenav'}), + dict(name=['object','link']) + ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + img {float:left; clear:both; margin:10px} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt') + ] + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def get_cover_url(self): + index = 'http://www.cosmohispano.com/revista' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'img_portada'}) + if link_item: + cover_url = "http://www.cosmohispano.com"+link_item['src'] + return cover_url diff --git a/resources/recipes/el_pais_uy.recipe b/resources/recipes/el_pais_uy.recipe new file mode 100644 index 0000000000..b474b5c232 --- /dev/null +++ b/resources/recipes/el_pais_uy.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.elpais.com.uy/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pais' + __author__ = 'Gustavo Azambuja' + description = 'Noticias | Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 2 + encoding = 'iso-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1'), + dict(name='div', attrs={'id':'Contenido'}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}), + dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}), + dict(name='p', attrs={'class':'FacebookLikeButton'}), + dict(name=['object','form']), + dict(name=['object','table']) ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'), + (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'), + (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'), + (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'), + (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'), + (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'), + (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'), + (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.elpais.com.uy' + soup = self.index_to_soup(index) + link_item = soup.find('div',attrs={'class':'boxmedio box257'}) + print link_item + if link_item: + cover_url = 'http://www.elpais.com.uy'+link_item.img['src'] + return cover_url + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/freeway.recipe b/resources/recipes/freeway.recipe new file mode 100644 index 0000000000..cb6d41ebb2 --- /dev/null +++ b/resources/recipes/freeway.recipe @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://freeway.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'freeway.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Revista Freeway, Montevideo, Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 1 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + conversion_options = {'linearize_tables': True} + + oldest_article = 180 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(id=['contenido']), + dict(name='a', attrs={'class':'titulo_art_ppal'}), + dict(name='img', attrs={'class':'recuadro'}), + dict(name='td', attrs={'class':'txt_art_ppal'}) + ] + remove_tags = [ + dict(name=['object','link']) + ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + img {float:left; clear:both; margin:10px} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + + def parse_index(self): + feeds = [] + for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]: + articles = self.art_parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def art_parse_section(self, url): + soup = self.index_to_soup(url) + div = soup.find(attrs={'id': 'tbl_1'}) + + current_articles = [] + for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}): + if tag.get('class') == 'link-list-heading': + break + for td in tag.findAll('td'): + a = td.find('a', attrs= {'class': 'titulo_articulos'}) + if a is None: + continue + title = self.tag_to_string(a) + url = a.get('href', False) + if not url or not title: + continue + if url.startswith('/'): + url = 'http://freeway.com.uy'+url + p = td.find('p', attrs= {'class': 'txt_articulos'}) + description = self.tag_to_string(p) + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + self.log('\t\t\t', description) + current_articles.append({'title': title, 'url': url, 'description':description, 'date':''}) + + return current_articles + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def get_cover_url(self): + #index = 'http://www.cosmohispano.com/revista' + #soup = self.index_to_soup(index) + #link_item = soup.find('img',attrs={'class':'img_portada'}) + #if link_item: + # cover_url = "http://www.cosmohispano.com"+link_item['src'] + return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg' diff --git a/resources/recipes/la_diaria.recipe b/resources/recipes/la_diaria.recipe new file mode 100644 index 0000000000..d89eb465dd --- /dev/null +++ b/resources/recipes/la_diaria.recipe @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +ladiaria.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'La Diaria' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['article'])] + remove_tags = [ + dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}), + dict(name='div', attrs={'id':'discussion'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://ladiaria.com/feeds/articulos') + ] + + def get_cover_url(self): + return 'http://ladiaria.com/edicion/imagenportada/' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/la_razon_bo.recipe b/resources/recipes/la_razon_bo.recipe index b7cd59d043..18a00d6763 100644 --- a/resources/recipes/la_razon_bo.recipe +++ b/resources/recipes/la_razon_bo.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class LaRazon_Bol(BasicNewsRecipe): - title = 'La Razón - Bolivia' + title = u'La Razón - Bolivia' __author__ = 'Darko Miletic' description = 'El diario nacional de Bolivia' publisher = 'Praxsis S.R.L.' diff --git a/resources/recipes/montevideo_com.recipe b/resources/recipes/montevideo_com.recipe new file mode 100644 index 0000000000..cabd4181d6 --- /dev/null +++ b/resources/recipes/montevideo_com.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.montevideo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Montevideo COMM' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf-8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['txt'])] + remove_tags = [ + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'), + (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'), + (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'), + (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'), + # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'), + # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'), + (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023') + ] + + def get_cover_url(self): + return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg' + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/observa_digital.recipe b/resources/recipes/observa_digital.recipe new file mode 100644 index 0000000000..375d67236c --- /dev/null +++ b/resources/recipes/observa_digital.recipe @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +observa.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Observa Digital' + __author__ = '2010, Gustavo Azambuja ' + description = 'Noticias desde Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['contenido'])] + remove_tags = [ + dict(name='div', attrs={'id':'contenedorVinculadas'}), + dict(name='p', attrs={'id':'nota_firma'}), + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'), + (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'), + (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'), + (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'}) + if link_item: + cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip() + + print cover_url + + return cover_url + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/revista_bla.recipe b/resources/recipes/revista_bla.recipe new file mode 100644 index 0000000000..15c7e7fb3f --- /dev/null +++ b/resources/recipes/revista_bla.recipe @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.revistabla.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Revista Bla' + __author__ = 'Gustavo Azambuja' + description = 'Moda | Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 20 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['body_container'])] + remove_tags = [ + dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}), + dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}), + dict(name='p', attrs={'class':'FacebookLikeButton'}), + dict(name=['object','link']) ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.revistabla.com/feed/') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.revistabla.com' + soup = self.index_to_soup(index) + link_item = soup.find('div',attrs={'class':'header_right'}) + if link_item: + cover_url = link_item.img['src'] + return cover_url + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/revista_muy.recipe b/resources/recipes/revista_muy.recipe index e452a6f053..b101fe97ce 100644 --- a/resources/recipes/revista_muy.recipe +++ b/resources/recipes/revista_muy.recipe @@ -108,3 +108,10 @@ def parse_index(self): feeds.append((title, articles)) return feeds + def get_cover_url(self): + index = 'http://www.muyinteresante.es/revista' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'img_portada'}) + if link_item: + cover_url = "http://www.muyinteresante.es"+link_item['src'] + return cover_url diff --git a/resources/recipes/telepolis.recipe b/resources/recipes/telepolis.recipe index 1009dca275..4ca57f8275 100644 --- a/resources/recipes/telepolis.recipe +++ b/resources/recipes/telepolis.recipe @@ -3,12 +3,12 @@ __license__ = 'GPL v3' __copyright__ = '2009, Gerhard Aigner ' -''' http://www.derstandard.at - Austrian Newspaper ''' + import re from calibre.web.feeds.news import BasicNewsRecipe class TelepolisNews(BasicNewsRecipe): - title = u'Telepolis (News)' + title = u'Telepolis (News+Artikel)' __author__ = 'Gerhard Aigner' publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' description = 'News from telepolis' @@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe): encoding = "utf-8" language = 'de_AT' - use_embedded_content = False + use_embedded_content =False remove_empty_feeds = True preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] - keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})] - remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})] + keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})] + remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})] - feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')] + feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')] html2lrf_options = [ '--comment' , description @@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe): def get_article_url(self, article): '''if the linked article is of kind artikel don't take it''' - if (article.link.count('artikel') > 0) : + if (article.link.count('artikel') > 1) : return None return article.link @@ -49,3 +49,5 @@ def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) return soup + + diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index dc3dd33604..118b6690f0 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -132,7 +132,7 @@ def ignore_lib(root, items): shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell')) shutil.rmtree(comext) - for pat in (r'numpy', r'PyQt4\uic\port_v3'): + for pat in (r'PyQt4\uic\port_v3', ): x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0] shutil.rmtree(x) diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 281cd8668e..45aa4d2afb 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -19,7 +19,7 @@ Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw This is where all dependencies will be installed. -Add C:\Python26\Scripts and C:\Python26 to PATH +Add C:\Python27\Scripts and C:\Python27 to PATH Install setuptools from http://pypi.python.org/pypi/setuptools If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked:: @@ -28,7 +28,7 @@ If there are no windows binaries already compiled for the version of python you Run the following command to install python dependencies:: - easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform + easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 934dc0879e..174441c276 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -229,6 +229,10 @@ def delete_via_sql(self, ContentID, ContentType): #Delete the volume_shortcovers second cursor.execute('delete from volume_shortcovers where volumeid = ?', t) + # Delete the rows from content_keys + if self.dbversion >= 8: + cursor.execute('delete from content_keys where volumeid = ?', t) + # Delete the chapters associated with the book next t = (ContentID,ContentID,) cursor.execute('delete from content where BookID = ? or ContentID = ?', t) diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index b6b3fb9c40..b797a477d6 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -145,18 +145,21 @@ def config_widget(self): setattr(w, '_'+x, cb) cb.setChecked(c.get(x, True)) w._layout.addWidget(cb) - - cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name)) - setattr(w, '_textcomments', cb) - cb.setChecked(c.get('textcomments', False)) - w._layout.addWidget(cb) + + if self.has_html_comments: + cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name)) + setattr(w, '_textcomments', cb) + cb.setChecked(c.get('textcomments', False)) + w._layout.addWidget(cb) return w def save_settings(self, w): dl_settings = {} - for x in ('rating', 'tags', 'comments', 'textcomments'): + for x in ('rating', 'tags', 'comments'): dl_settings[x] = getattr(w, '_'+x).isChecked() + if self.has_html_comments: + dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked() c = self.config_store() c.set(self.name, dl_settings) if hasattr(w, '_sc'): diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 83cf6ee0ed..9169227326 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -90,10 +90,8 @@ def build_isbn(base_url, opts): return base_url + 'index1=isbn&value1='+opts.isbn def build_combined(base_url, opts): - query = '' - for e in (opts.title, opts.author, opts.publisher): - if e is not None: - query += ' ' + e + query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \ + if e is not None ]) query = query.strip() if len(query) == 0: raise ISBNDBError('You must specify at least one of --author, --title or --publisher') @@ -141,15 +139,8 @@ def create_books(opts, args, timeout=5.): print ('ISBNDB query: '+url) tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)] - ans = [] - for x in tans: - add = True - for y in ans: - if y.isbn == x.isbn: - add = False - if add: - ans.append(x) - return ans + #remove duplicates ISBN + return list(dict((book.isbn, book) for book in tans).values()) def main(args=sys.argv): parser = option_parser() diff --git a/src/calibre/library/__init__.py b/src/calibre/library/__init__.py index 0f8e5e5496..8ff23c0a0a 100644 --- a/src/calibre/library/__init__.py +++ b/src/calibre/library/__init__.py @@ -6,3 +6,53 @@ def db(path=None): from calibre.library.database2 import LibraryDatabase2 from calibre.utils.config import prefs return LibraryDatabase2(path if path else prefs['library_path']) + + +def generate_test_db(library_path, + num_of_records=20000, + num_of_authors=6000, + num_of_tags=10000, + tag_length=7, + author_length=7, + title_length=10, + max_authors=10, + max_tags=10 + ): + import random, string, os, sys, time + + if not os.path.exists(library_path): + os.makedirs(library_path) + + def randstr(length): + return ''.join(random.choice(string.letters) for i in + xrange(length)) + + all_tags = [randstr(tag_length) for j in xrange(num_of_tags)] + print 'Generated', num_of_tags, 'tags' + all_authors = [randstr(author_length) for j in xrange(num_of_authors)] + print 'Generated', num_of_authors, 'authors' + all_titles = [randstr(title_length) for j in xrange(num_of_records)] + print 'Generated', num_of_records, 'titles' + + testdb = db(library_path) + + print 'Creating', num_of_records, 'records...' + + start = time.time() + + for i, title in enumerate(all_titles): + print i+1, + sys.stdout.flush() + authors = random.randint(1, max_authors) + authors = [random.choice(all_authors) for i in xrange(authors)] + tags = random.randint(0, max_tags) + tags = [random.choice(all_tags) for i in xrange(tags)] + from calibre.ebooks.metadata.book.base import Metadata + mi = Metadata(title, authors) + mi.tags = tags + testdb.import_book(mi, []) + + t = time.time() - start + print '\nGenerated', num_of_records, 'records in:', t, 'seconds' + print 'Time per record:', t/float(num_of_records) + diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 19519d6d71..33525f6540 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -278,10 +278,10 @@ def run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.library.save_to_disk import preprocess_template #Bibtex functions - from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey + from calibre.utils.bibtex import BibTeX def create_bibtex_entry(entry, fields, mode, template_citation, - asccii_bibtex = True, citation_bibtex = True): + bibtexdict, citation_bibtex = True): #Bibtex doesn't like UTF-8 but keep unicode until writing #Define starting chain or if book valid strict and not book return a Fail string @@ -297,7 +297,8 @@ def create_bibtex_entry(entry, fields, mode, template_citation, if citation_bibtex : # Citation tag - bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex)) + bibtex_entry.append(make_bibtex_citation(entry, template_citation, + bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: @@ -312,11 +313,11 @@ def create_bibtex_entry(entry, fields, mode, template_citation, pass if field == 'authors' : - bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item)) + bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field in ['title', 'publisher', 'cover', 'uuid', 'author_sort', 'series'] : - bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex))) + bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) elif field == 'id' : bibtex_entry.append(u'calibreid = "%s"' % int(item)) @@ -329,13 +330,13 @@ def create_bibtex_entry(entry, fields, mode, template_citation, elif field == 'tags' : #A list to flatten - bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex)) + bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments' : #\n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') - bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex)) + bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn' : # Could be 9, 10 or 13 digits @@ -353,8 +354,7 @@ def create_bibtex_entry(entry, fields, mode, template_citation, elif field == 'pubdate' : bibtex_entry.append(u'year = "%s"' % item.year) - bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item), - asccii_bibtex)) + bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' @@ -371,7 +371,7 @@ def check_entry_book_valid(entry): else : return True - def make_bibtex_citation(entry, template_citation, asccii_bibtex): + def make_bibtex_citation(entry, template_citation, bibtexclass): #define a function to replace the template entry by its value def tpl_replace(objtplname) : @@ -392,8 +392,9 @@ def tpl_replace(objtplname) : return u'' if len(template_citation) >0 : - tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}', - tpl_replace, template_citation)), asccii_bibtex) + tpl_citation = bibtexclass.utf8ToBibtex( + bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}', + tpl_replace, template_citation))) if len(tpl_citation) >0 : return tpl_citation @@ -404,10 +405,7 @@ def tpl_replace(objtplname) : else : template_citation = u'%s' % str(entry["id"]) - if asccii_bibtex : - return ValidateCitationKey(template_citation.encode('ascii', 'replace')) - else : - return ValidateCitationKey(template_citation) + return bibtexclass.ValidateCitationKey(template_citation) self.fmt = path_to_output.rpartition('.')[2] self.notification = notification @@ -475,13 +473,16 @@ def tpl_replace(objtplname) : if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) + #Initialize BibTeX class + bibtexc = BibTeX() + #Entries writing after Bibtex formating (or not) if bibfile_enc != 'ascii' : - asccii_bibtex = False + bibtexc.ascii_bibtex = False else : - asccii_bibtex = True + bibtexc.ascii_bibtex = True - #Check and go to default in case of bad CLI + #Check citation choice and go to default in case of bad CLI if isinstance(opts.impcit, (StringType, UnicodeType)) : if opts.impcit == 'False' : citation_bibtex= False @@ -493,6 +494,7 @@ def tpl_replace(objtplname) : else : citation_bibtex= opts.impcit + #Preprocess for error and light correction template_citation = preprocess_template(opts.bib_cit) #Open output and write entries @@ -514,7 +516,7 @@ def tpl_replace(objtplname) : for entry in data: outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation, - asccii_bibtex, citation_bibtex)) + bibtexc, citation_bibtex)) outfile.close() diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8e7002097a..21a54a4dd6 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1248,15 +1248,20 @@ def doit(func, *args, **kwargs): traceback.print_exc() else: raise + path_changed = False if set_title and mi.title: - self.set_title(id, mi.title, commit=False) + self._set_title(id, mi.title) + path_changed = True if set_authors: if not mi.authors: mi.authors = [_('Unknown')] authors = [] for a in mi.authors: authors += string_to_authors(a) - self.set_authors(id, authors, notify=False, commit=False) + self._set_authors(id, authors) + path_changed = True + if path_changed: + self.set_path(id, index_is_id=True) if mi.author_sort: doit(self.set_author_sort, id, mi.author_sort, notify=False, commit=False) @@ -1348,13 +1353,7 @@ def author_sort_from_authors(self, authors): result.append(r) return ' & '.join(result).replace('|', ',') - def set_authors(self, id, authors, notify=True, commit=True): - ''' - Note that even if commit is False, the db will still be committed to - because this causes the location of files to change - - :param authors: A list of authors. - ''' + def _set_authors(self, id, authors): if not authors: authors = [_('Unknown')] self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,)) @@ -1379,25 +1378,30 @@ def set_authors(self, id, authors, notify=True, commit=True): ss = self.author_sort_from_book(id, index_is_id=True) self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (ss, id)) - self.dirtied([id], commit=False) - if commit: - self.conn.commit() self.data.set(id, self.FIELD_MAP['authors'], ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) + + def set_authors(self, id, authors, notify=True, commit=True): + ''' + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + + :param authors: A list of authors. + ''' + self._set_authors(id, authors) + self.dirtied([id], commit=False) + if commit: + self.conn.commit() self.set_path(id, index_is_id=True) if notify: self.notify('metadata', [id]) - def set_title(self, id, title, notify=True, commit=True): - ''' - Note that even if commit is False, the db will still be committed to - because this causes the location of files to change - ''' + def _set_title(self, id, title): if not title: - return - if not isinstance(title, unicode): + return False + if isbytestring(title): title = title.decode(preferred_encoding, 'replace') self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id)) self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True) @@ -1405,6 +1409,15 @@ def set_title(self, id, title, notify=True, commit=True): self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True) else: self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True) + return True + + def set_title(self, id, title, notify=True, commit=True): + ''' + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + ''' + if not self._set_title(id, title): + return self.set_path(id, index_is_id=True) self.dirtied([id], commit=False) if commit: @@ -2072,13 +2085,11 @@ def create_book_entry(self, mi, cover=None, add_duplicates=True, (id, title, series_index, aus)) self.data.books_added([id], self) - self.set_path(id, True) - self.conn.commit() if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, ignore_errors=True) + self.set_metadata(id, mi, ignore_errors=True, commit=True) if cover is not None: try: self.set_cover(id, cover) @@ -2114,13 +2125,11 @@ def add_books(self, paths, formats, metadata, add_duplicates=True): id = obj.lastrowid self.data.books_added([id], self) ids.append(id) - self.set_path(id, True) - self.conn.commit() if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi) + self.set_metadata(id, mi, commit=True, ignore_errors=True) npath = self.run_import_plugins(path, format) format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() stream = lopen(npath, 'rb') @@ -2154,12 +2163,11 @@ def import_book(self, mi, formats, notify=True, import_hooks=True, (title, series_index, aus)) id = obj.lastrowid self.data.books_added([id], self) - self.set_path(id, True) if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, ignore_errors=True) + self.set_metadata(id, mi, ignore_errors=True, commit=True) if preserve_uuid and mi.uuid: self.set_uuid(id, mi.uuid, commit=False) for path in formats: diff --git a/src/calibre/startup.py b/src/calibre/startup.py index e384153993..e74660d0bc 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -129,7 +129,7 @@ def __init__(self, name, fobject): def __getattribute__(self, attr): if attr in ('name', '__enter__', '__str__', '__unicode__', - '__repr__'): + '__repr__', '__exit__'): return object.__getattribute__(self, attr) fobject = object.__getattribute__(self, 'fobject') return getattr(fobject, attr) @@ -155,6 +155,11 @@ def __enter__(self): fobject.__enter__() return self + def __exit__(self, *args): + fobject = object.__getattribute__(self, 'fobject') + return fobject.__exit__(*args) + + m = mode[0] random = len(mode) > 1 and mode[1] == '+' binary = mode[-1] == 'b' diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py index 1328aa9157..d19a6b05fe 100644 --- a/src/calibre/utils/bibtex.py +++ b/src/calibre/utils/bibtex.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Collection of python utility-methodes commonly used by other bibliograph packages. From http://pypi.python.org/pypi/bibliograph.core/ @@ -62,11 +60,14 @@ DAMAGE. """ -__docformat__ = 'reStructuredText' __author__ = 'sengian ' +__docformat__ = 'restructuredtext en' import re, string +from calibre.constants import preferred_encoding +from calibre.utils.mreplace import MReplace + utf8enc2latex_mapping = { # This is a mapping of Unicode characters to LaTeX equivalents. # The information has been extracted from @@ -2463,7 +2464,7 @@ u'\U0001d7fd': '$\\mathtt{7}$', u'\U0001d7fe': '$\\mathtt{8}$', u'\U0001d7ff': '$\\mathtt{9}$', - + #Items from simple list u'\u0106': "{\\a\\'C}", u'\u0408': '{\\CYRJE}', @@ -2842,69 +2843,66 @@ '"':'{"}', } -def ValidateCitationKey(text): - """ - removes characters not allowed in BibTeX keys +class BibTeX: + def __init__(self): + self.rep_utf8 = MReplace(utf8enc2latex_mapping) + self.rep_ent = MReplace(entity_mapping) + #Set default conversion to ASCII BibTeX + self.ascii_bibtex = True + # This substitution is based on the description of cite key restrictions at + # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html + self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]') + self.upper = re.compile(u'[' + + string.uppercase.decode(preferred_encoding) + u']') + self.escape = re.compile(u'[~#&%_]') - >>> from bibliograph.core.utils import _validKey - >>> _validKey(DummyEntry('Foo Bar')) - 'FooBar' + def ValidateCitationKey(self, text): + """ + removes characters not allowed in BibTeX keys + >>> ValidateCitationKey(DummyEntry('my@id')) + 'myid' + """ + return self.invalid_cit.sub(u'', text) - >>> _validKey(DummyEntry('my@id')) - 'myid' + def braceUppercase(self, text): + """ Convert uppercase letters to bibtex encoded uppercase + >>> braceUppercase('Foo Bar') + '{F}oo {B}ar' + """ + return self.upper.sub(lambda m: u'{%s}' % m.group(), text) - """ - # This substitution is based on the description of cite key restrictions at - # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html - return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text) + def resolveEntities(self, text): + #for entity, entity_map in entity_mapping.iteritems(): + # text = text.replace(entity, entity_map) + #return text + return self.rep_ent.mreplace(text) -def BraceUppercase(text): - """ Convert uppercase letters to bibtex encoded uppercase + def resolveUnicode(self, text): + #UTF-8 text as entry + #for unichar, latexenc in utf8enc2latex_mapping.iteritems() : + # text = text.replace(unichar, latexenc) + text = self.rep_utf8.mreplace(text) + return text.replace(u'$}{$', u'') - >>> from bibliograph.core.utils import _braceUppercase - >>> _braceUppercase('foo bar') - 'foo bar' + def escapeSpecialCharacters(self, text): + """ + latex escaping some (not all) special characters + """ + text.replace('\\', '\\\\') + return self.escape.sub(lambda m: u'\\%s' % m.group(), text) - >>> _braceUppercase('Foo Bar') - '{F}oo {B}ar' - """ - for uc in string.uppercase: - text = text.replace(uc, u'{%s}' % uc) - return text + #Calibre functions + #Option to go to official ASCII Bibtex or unofficial UTF-8 + #Go from an unicode entry to ASCII Bibtex format without encoding + def utf8ToBibtex(self, text): + if len(text) == 0: + return '' + text.replace('\\', '\\\\') + text = self.resolveEntities(text) + if self.ascii_bibtex : + text = self.resolveUnicode(text) + return self.escapeSpecialCharacters(text) -def resolveEntities(text): - for entity, entity_map in entity_mapping.iteritems(): - text = text.replace(entity, entity_map) - return text - -def resolveUnicode(text): - #UTF-8 text as entry - for unichar, latexenc in utf8enc2latex_mapping.iteritems() : - text = text.replace(unichar, latexenc) - return text.replace(u'$}{$', u'') - -def escapeSpecialCharacters(text): - """ - latex escaping some (not all) special characters - """ - text.replace('\\', '\\\\') - escape = ['~', '#', '&', '%', '_'] - for c in escape: - text = text.replace(c, '\\' + c ) - return text - -#Calibre functions -#Go from an unicode entry to ASCII Bibtex format without encoding -#Option to go to official ASCII Bibtex or unofficial UTF-8 -def utf8ToBibtex(text, asccii_bibtex = True): - if len(text) == 0: - return '' - text.replace('\\', '\\\\') - text = resolveEntities(text) - if asccii_bibtex : - text = resolveUnicode(text) - return escapeSpecialCharacters(text) - -def bibtex_author_format(item): - #Format authors for Bibtex compliance (get a list as input) - return utf8ToBibtex(u' and'.join([author for author in item])) + def bibtex_author_format(self, item): + #Format authors for Bibtex compliance (get a list as input) + return self.utf8ToBibtex(u' and'.join([author for author in item])) diff --git a/src/calibre/utils/mreplace.py b/src/calibre/utils/mreplace.py new file mode 100644 index 0000000000..b9fbc0bded --- /dev/null +++ b/src/calibre/utils/mreplace.py @@ -0,0 +1,32 @@ +#multiple replace from dictionnary : http://code.activestate.com/recipes/81330/ +__license__ = 'GPL v3' +__copyright__ = '2010, sengian ' +__docformat__ = 'restructuredtext en' + +import re +from UserDict import UserDict + +class MReplace(UserDict): + def __init__(self, dict = None): + UserDict.__init__(self, dict) + self.re = None + self.regex = None + self.compile_regex() + + def compile_regex(self): + if len(self.data) > 0: + keys = sorted(self.data.keys(), key=len) + keys.reverse() + tmp = "(%s)" % "|".join(map(re.escape, keys)) + if self.re != tmp: + self.re = tmp + self.regex = re.compile(self.re) + + def __call__(self, mo): + return self[mo.string[mo.start():mo.end()]] + + def mreplace(self, text): + #Replace without regex compile + if len(self.data) < 1 or self.re is None: + return text + return self.regex.sub(self, text) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index 5b34ddab0b..a513cf3880 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -61,6 +61,11 @@ def attr(n, d): def serialize_collection(mapping_of_recipe_classes): collection = E.recipe_collection() + '''for u, x in mapping_of_recipe_classes.items(): + print 11111, u, repr(x.title) + if isinstance(x.title, str): + x.title.decode('ascii') + ''' for urn in sorted(mapping_of_recipe_classes.keys(), key=lambda key: getattr(mapping_of_recipe_classes[key], 'title', 'zzz')):