Merge from trunk

2026-04-25 03:33:39 +02:00 · 2010-11-23 06:35:48 +01:00 · 2010-11-23 06:35:48 +01:00 · 9653087ea0
commit 9653087ea0
parent 3137b37b01 81e05df304
22 changed files with 695 additions and 49 deletions
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+180.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = '180.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf-8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Titulares', u'http://www.180.com.uy/feed.php')
+        ]
+
+    def get_cover_url(self):
+		return 'http://www.180.com.uy/tplef/img/logo.gif'
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+bitacora.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'bitacora.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'iso-8859-1'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['txt'])]
+    remove_tags = [
+             dict(name='div', attrs={'class':'tablafoot'}),
+             dict(name=['object','h4']),
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
+        ]
+
+    def get_cover_url(self):
+	cover_url = None
+	index = 'http://www.bitacora.com.uy'
+	soup = self.index_to_soup(index)
+	link_item = soup.find('img',attrs={'class':'imgtapa'})
+	if link_item:
+		cover_url = "http://www.bitacora.com.uy/"+link_item['src']
+	return cover_url
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/cosmopolitan.recipe
+++ b/resources/recipes/cosmopolitan.recipe
@ -0,0 +1,69 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+Muy Interesante
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'Cosmopolitan'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Revista Cosmopolitan, Edicion Espanola'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 1
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+    conversion_options = {'linearize_tables': True}
+
+    oldest_article        = 180
+    max_articles_per_feed = 100
+    keep_only_tags = [
+             dict(id=['contenido']),
+             dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
+                     ]
+    remove_tags = [
+             dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
+             dict(name='div', attrs={'id':'comment'}),
+             dict(name='table', attrs={'class':'pagenav'}),
+             dict(name=['object','link'])
+                  ]
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                img {float:left; clear:both; margin:10px}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+                  (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
+    ]
+
+    def preprocess_html(self, soup):
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]
+        return soup
+
+    def get_cover_url(self):
+		index = 'http://www.cosmohispano.com/revista'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('img',attrs={'class':'img_portada'})
+		if link_item:
+			cover_url = "http://www.cosmohispano.com"+link_item['src']
+		return cover_url
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.elpais.com.uy/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'Diario El Pais'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias | Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 2
+    encoding = 'iso-8859-1'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [
+                      dict(name='h1'),
+                      dict(name='div', attrs={'id':'Contenido'})
+                      ]
+    remove_tags = [
+				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
+				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
+				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
+				 dict(name=['object','form']),
+				 dict(name=['object','table']) ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
+           (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
+           (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
+           (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
+           (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
+           (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
+           (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
+           (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') 
+        ]
+
+    def get_cover_url(self):
+		cover_url = None
+		index = 'http://www.elpais.com.uy'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('div',attrs={'class':'boxmedio box257'})
+		print link_item
+		if link_item:
+			cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
+		return cover_url
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -0,0 +1,100 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://freeway.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'freeway.com.uy'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Revista Freeway, Montevideo, Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 1
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+    conversion_options = {'linearize_tables': True}
+
+    oldest_article        = 180
+    max_articles_per_feed = 100
+    keep_only_tags = [
+             dict(id=['contenido']),
+             dict(name='a', attrs={'class':'titulo_art_ppal'}),
+             dict(name='img', attrs={'class':'recuadro'}),
+             dict(name='td', attrs={'class':'txt_art_ppal'})
+                     ]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                img {float:left; clear:both; margin:10px}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    def parse_index(self):
+            feeds = []
+            for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
+               articles = self.art_parse_section(url)
+               if articles:
+                   feeds.append((title, articles))
+            return feeds
+
+    def art_parse_section(self, url):
+            soup = self.index_to_soup(url)
+            div = soup.find(attrs={'id': 'tbl_1'})
+
+            current_articles = []
+            for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
+                if tag.get('class') == 'link-list-heading':
+                    break
+                for td in tag.findAll('td'):
+                    a = td.find('a', attrs= {'class': 'titulo_articulos'})
+                    if a is None:
+                        continue
+                    title = self.tag_to_string(a)
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                         url = 'http://freeway.com.uy'+url
+                    p = td.find('p', attrs= {'class': 'txt_articulos'})
+                    description = self.tag_to_string(p)
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    self.log('\t\t\t', description)
+                    current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
+
+            return current_articles
+
+    def preprocess_html(self, soup):
+        attribs = [  'style','font','valign'
+                    ,'colspan','width','height'
+                    ,'rowspan','summary','align'
+                    ,'cellspacing','cellpadding'
+                    ,'frames','rules','border'
+                  ]
+        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
+            item.name = 'div'
+            for attrib in attribs:
+                if item.has_key(attrib):
+                   del item[attrib]
+        return soup
+
+    def get_cover_url(self):
+		#index = 'http://www.cosmohispano.com/revista'
+		#soup = self.index_to_soup(index)
+		#link_item = soup.find('img',attrs={'class':'img_portada'})
+		#if link_item:
+		#	cover_url = "http://www.cosmohispano.com"+link_item['src']
+		return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -0,0 +1,48 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+ladiaria.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class General(BasicNewsRecipe):
+    title                 = 'La Diaria'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['article'])]
+    remove_tags = [
+             dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
+             dict(name='div', attrs={'id':'discussion'}),
+             dict(name=['object','link'])
+                  ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Articulos', u'http://ladiaria.com/feeds/articulos')
+        ]
+
+    def get_cover_url(self):
+        return 'http://ladiaria.com/edicion/imagenportada/'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -8,7 +8,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRazon_Bol(BasicNewsRecipe):
-    title                 = 'La Razón - Bolivia'
+    title                 = u'La Razón - Bolivia'
    __author__            = 'Darko Miletic'
    description           = 'El diario nacional de Bolivia'
    publisher             = 'Praxsis S.R.L.'
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -0,0 +1,56 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.montevideo.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Montevideo COMM'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Noticias de Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf-8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['txt'])]
+    remove_tags = [
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
+           (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
+           (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
+           (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
+           # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
+           # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
+           (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
+        ]
+
+    def get_cover_url(self):
+		return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -0,0 +1,63 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+observa.com.uy
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Observa Digital'
+    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+    description           = 'Noticias desde Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['contenido'])]
+    remove_tags = [
+             dict(name='div', attrs={'id':'contenedorVinculadas'}),
+             dict(name='p', attrs={'id':'nota_firma'}),
+             dict(name=['object','link'])
+                  ]
+
+    remove_attributes = ['width','height', 'style', 'font', 'color']
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
+           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
+           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
+           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
+        ]
+
+    def get_cover_url(self):
+		cover_url = None
+		index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
+		soup = self.index_to_soup(index)
+		link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
+		if link_item:
+			cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
+
+		print cover_url
+
+		return cover_url
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -0,0 +1,54 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
+'''
+http://www.revistabla.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Noticias(BasicNewsRecipe):
+    title                 = 'Revista Bla'
+    __author__            = 'Gustavo Azambuja'
+    description           = 'Moda | Uruguay'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    use_embedded_content  = False
+    recursion             = 5
+    encoding = 'utf8'
+    remove_javascript = True
+    no_stylesheets = True
+
+    oldest_article        = 20
+    max_articles_per_feed = 100
+    keep_only_tags = [dict(id=['body_container'])]
+    remove_tags = [
+				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
+				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
+				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
+				 dict(name=['object','link']) ]
+
+    extra_css = '''
+                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
+                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
+                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
+                p {font-family:Arial,Helvetica,sans-serif;}
+                '''
+    feeds = [
+           (u'Articulos', u'http://www.revistabla.com/feed/')
+        ]
+
+    def get_cover_url(self):
+	cover_url = None
+	index = 'http://www.revistabla.com'
+	soup = self.index_to_soup(index)
+	link_item = soup.find('div',attrs={'class':'header_right'})
+	if link_item:
+		cover_url = link_item.img['src']
+	return cover_url
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/revista_muy.recipe
+++ b/resources/recipes/revista_muy.recipe
@ -108,3 +108,10 @@ def parse_index(self):
                   feeds.append((title, articles))
            return feeds

+    def get_cover_url(self):
+        index = 'http://www.muyinteresante.es/revista'
+        soup = self.index_to_soup(index)
+        link_item = soup.find('img',attrs={'class':'img_portada'})
+        if link_item:
+            cover_url = "http://www.muyinteresante.es"+link_item['src']
+        return cover_url
--- a/resources/recipes/telepolis.recipe
+++ b/resources/recipes/telepolis.recipe
@ -3,12 +3,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'

-''' http://www.derstandard.at - Austrian Newspaper '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class TelepolisNews(BasicNewsRecipe):
-    title          = u'Telepolis (News)'
+    title          = u'Telepolis (News+Artikel)'
    __author__ = 'Gerhard Aigner'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from telepolis'
@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe):
    encoding = "utf-8"
    language = 'de_AT'

-    use_embedded_content = False
+    use_embedded_content =False
    remove_empty_feeds = True

    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]

-    keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
-    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+    keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
+    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]

-    feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
        '--comment'  , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):

    def get_article_url(self, article):
        '''if the linked article is of kind artikel don't take it'''
-        if (article.link.count('artikel') > 0) :
+        if (article.link.count('artikel') > 1) :
            return None
        return article.link

@ -49,3 +49,5 @@ def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
+
+
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -132,7 +132,7 @@ def ignore_lib(root, items):
        shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
        shutil.rmtree(comext)

-        for pat in (r'numpy', r'PyQt4\uic\port_v3'):
+        for pat in (r'PyQt4\uic\port_v3', ):
            x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
            shutil.rmtree(x)

--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -19,7 +19,7 @@ Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw

 This is where all dependencies will be installed.

-Add C:\Python26\Scripts and C:\Python26 to PATH 
+Add C:\Python27\Scripts and C:\Python27 to PATH 

 Install setuptools from http://pypi.python.org/pypi/setuptools
 If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked::
@ -28,7 +28,7 @@ If there are no windows binaries already compiled for the version of python you

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto

 Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)

--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -229,6 +229,10 @@ def delete_via_sql(self, ContentID, ContentType):
        #Delete the volume_shortcovers second
        cursor.execute('delete from volume_shortcovers where volumeid = ?', t)

+        # Delete the rows from content_keys
+        if self.dbversion >= 8:
+            cursor.execute('delete from content_keys where volumeid = ?', t)
+
        # Delete the chapters associated with the book next
        t = (ContentID,ContentID,)
        cursor.execute('delete from content where BookID  = ? or ContentID = ?', t)
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -140,7 +140,7 @@ def create_books(opts, args, timeout=5.):
    
    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
    #remove duplicates ISBN
-    return dict((book.isbn, book) for book in tans).values()
+    return list(dict((book.isbn, book) for book in tans).values())

 def main(args=sys.argv):
    parser = option_parser()
--- a/src/calibre/library/init.py
+++ b/src/calibre/library/init.py
@ -6,3 +6,53 @@ def db(path=None):
    from calibre.library.database2 import LibraryDatabase2
    from calibre.utils.config import prefs
    return LibraryDatabase2(path if path else prefs['library_path'])
+
+
+def generate_test_db(library_path,
+        num_of_records=20000,
+        num_of_authors=6000,
+        num_of_tags=10000,
+        tag_length=7,
+        author_length=7,
+        title_length=10,
+        max_authors=10,
+        max_tags=10
+        ):
+    import random, string, os, sys, time
+
+    if not os.path.exists(library_path):
+        os.makedirs(library_path)
+
+    def randstr(length):
+        return ''.join(random.choice(string.letters) for i in
+                xrange(length))
+
+    all_tags = [randstr(tag_length) for j in xrange(num_of_tags)]
+    print 'Generated', num_of_tags, 'tags'
+    all_authors = [randstr(author_length) for j in xrange(num_of_authors)]
+    print 'Generated', num_of_authors, 'authors'
+    all_titles = [randstr(title_length) for j in xrange(num_of_records)]
+    print 'Generated', num_of_records, 'titles'
+
+    testdb = db(library_path)
+
+    print 'Creating', num_of_records, 'records...'
+
+    start = time.time()
+
+    for i, title in enumerate(all_titles):
+        print i+1,
+        sys.stdout.flush()
+        authors = random.randint(1, max_authors)
+        authors = [random.choice(all_authors) for i in xrange(authors)]
+        tags = random.randint(0, max_tags)
+        tags = [random.choice(all_tags) for i in xrange(tags)]
+        from calibre.ebooks.metadata.book.base import Metadata
+        mi = Metadata(title, authors)
+        mi.tags = tags
+        testdb.import_book(mi, [])
+
+    t = time.time() - start
+    print '\nGenerated', num_of_records, 'records in:', t, 'seconds'
+    print 'Time per record:', t/float(num_of_records)
+
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -405,10 +405,7 @@ def tpl_replace(objtplname) :
            else :
                template_citation = u'%s' % str(entry["id"])

-            if asccii_bibtex :
-                return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
-            else :
-                return bibtexclass.ValidateCitationKey(template_citation)
+            return bibtexclass.ValidateCitationKey(template_citation)

        self.fmt = path_to_output.rpartition('.')[2]
        self.notification = notification
@ -478,7 +475,7 @@ def tpl_replace(objtplname) :

        #Initialize BibTeX class
        bibtexc = BibTeX()
-        
+
        #Entries writing after Bibtex formating (or not)
        if bibfile_enc != 'ascii' :
            bibtexc.ascii_bibtex = False
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1248,15 +1248,20 @@ def doit(func, *args, **kwargs):
                    traceback.print_exc()
                else:
                    raise
+        path_changed = False
        if set_title and mi.title:
-            self.set_title(id, mi.title, commit=False)
+            self._set_title(id, mi.title)
+            path_changed = True
        if set_authors:
            if not mi.authors:
                    mi.authors = [_('Unknown')]
            authors = []
            for a in mi.authors:
                authors += string_to_authors(a)
-            self.set_authors(id, authors, notify=False, commit=False)
+            self._set_authors(id, authors)
+            path_changed = True
+        if path_changed:
+            self.set_path(id, index_is_id=True)
        if mi.author_sort:
            doit(self.set_author_sort, id, mi.author_sort, notify=False,
                    commit=False)
@ -1348,13 +1353,7 @@ def author_sort_from_authors(self, authors):
                result.append(r)
        return ' & '.join(result).replace('|', ',')

-    def set_authors(self, id, authors, notify=True, commit=True):
-        '''
-        Note that even if commit is False, the db will still be committed to
-        because this causes the location of files to change
-
-        :param authors: A list of authors.
-        '''
+    def _set_authors(self, id, authors):
        if not authors:
            authors = [_('Unknown')]
        self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
@ -1379,25 +1378,30 @@ def set_authors(self, id, authors, notify=True, commit=True):
        ss = self.author_sort_from_book(id, index_is_id=True)
        self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
                          (ss, id))
-        self.dirtied([id], commit=False)
-        if commit:
-            self.conn.commit()
        self.data.set(id, self.FIELD_MAP['authors'],
                      ','.join([a.replace(',', '|') for a in authors]),
                      row_is_id=True)
        self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
+
+    def set_authors(self, id, authors, notify=True, commit=True):
+        '''
+        Note that even if commit is False, the db will still be committed to
+        because this causes the location of files to change
+
+        :param authors: A list of authors.
+        '''
+        self._set_authors(id, authors)
+        self.dirtied([id], commit=False)
+        if commit:
+            self.conn.commit()
        self.set_path(id, index_is_id=True)
        if notify:
            self.notify('metadata', [id])

-    def set_title(self, id, title, notify=True, commit=True):
-        '''
-        Note that even if commit is False, the db will still be committed to
-        because this causes the location of files to change
-        '''
+    def _set_title(self, id, title):
        if not title:
-            return
-        if not isinstance(title, unicode):
+            return False
+        if isbytestring(title):
            title = title.decode(preferred_encoding, 'replace')
        self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
        self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True)
@ -1405,6 +1409,15 @@ def set_title(self, id, title, notify=True, commit=True):
            self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True)
        else:
            self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True)
+        return True
+
+    def set_title(self, id, title, notify=True, commit=True):
+        '''
+        Note that even if commit is False, the db will still be committed to
+        because this causes the location of files to change
+        '''
+        if not self._set_title(id, title):
+            return
        self.set_path(id, index_is_id=True)
        self.dirtied([id], commit=False)
        if commit:
@ -2072,13 +2085,11 @@ def create_book_entry(self, mi, cover=None, add_duplicates=True,
                                (id, title, series_index, aus))

        self.data.books_added([id], self)
-        self.set_path(id, True)
-        self.conn.commit()
        if mi.timestamp is None:
            mi.timestamp = utcnow()
        if mi.pubdate is None:
            mi.pubdate = utcnow()
-        self.set_metadata(id, mi, ignore_errors=True)
+        self.set_metadata(id, mi, ignore_errors=True, commit=True)
        if cover is not None:
            try:
                self.set_cover(id, cover)
@ -2114,13 +2125,11 @@ def add_books(self, paths, formats, metadata, add_duplicates=True):
            id = obj.lastrowid
            self.data.books_added([id], self)
            ids.append(id)
-            self.set_path(id, True)
-            self.conn.commit()
            if mi.timestamp is None:
                mi.timestamp = utcnow()
            if mi.pubdate is None:
                mi.pubdate = utcnow()
-            self.set_metadata(id, mi)
+            self.set_metadata(id, mi, commit=True, ignore_errors=True)
            npath = self.run_import_plugins(path, format)
            format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
            stream = lopen(npath, 'rb')
@ -2154,12 +2163,11 @@ def import_book(self, mi, formats, notify=True, import_hooks=True,
                          (title, series_index, aus))
        id = obj.lastrowid
        self.data.books_added([id], self)
-        self.set_path(id, True)
        if mi.timestamp is None:
            mi.timestamp = utcnow()
        if mi.pubdate is None:
            mi.pubdate = utcnow()
-        self.set_metadata(id, mi, ignore_errors=True)
+        self.set_metadata(id, mi, ignore_errors=True, commit=True)
        if preserve_uuid and mi.uuid:
            self.set_uuid(id, mi.uuid, commit=False)
        for path in formats:
--- a/src/calibre/startup.py
+++ b/src/calibre/startup.py
@ -129,7 +129,7 @@ def __init__(self, name, fobject):

                def __getattribute__(self, attr):
                    if attr in ('name', '__enter__', '__str__', '__unicode__',
-                            '__repr__'):
+                            '__repr__', '__exit__'):
                        return object.__getattribute__(self, attr)
                    fobject = object.__getattribute__(self, 'fobject')
                    return getattr(fobject, attr)
@ -155,6 +155,11 @@ def __enter__(self):
                    fobject.__enter__()
                    return self

+                def __exit__(self, *args):
+                    fobject = object.__getattribute__(self, 'fobject')
+                    return fobject.__exit__(*args)
+
+
            m = mode[0]
            random = len(mode) > 1 and mode[1] == '+'
            binary = mode[-1] == 'b'
--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@ -69,6 +69,9 @@
 from calibre.constants import preferred_encoding
 from calibre.utils.mreplace import MReplace

+from calibre.constants import preferred_encoding
+from calibre.utils.mreplace import MReplace
+
 utf8enc2latex_mapping = {
    # This is a mapping of Unicode characters to LaTeX equivalents.
    # The information has been extracted from
@ -2465,7 +2468,7 @@
    u'\U0001d7fd': '$\\mathtt{7}$',
    u'\U0001d7fe': '$\\mathtt{8}$',
    u'\U0001d7ff': '$\\mathtt{9}$',
-    
+
    #Items from simple list
    u'\u0106': "{\\a\\'C}",
    u'\u0408': '{\\CYRJE}',
@ -2853,10 +2856,10 @@ def __init__(self):
        # This substitution is based on the description of cite key restrictions at
        # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html
        self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]')
-        self.upper = re.compile(u'[' + 
+        self.upper = re.compile(u'[' +
            string.uppercase.decode(preferred_encoding) + u']')
        self.escape = re.compile(u'[~#&%_]')
-        
+
    def ValidateCitationKey(self, text):
        """
        removes characters not allowed in BibTeX keys
@ -2893,7 +2896,7 @@ def escapeSpecialCharacters(self, text):
        return self.escape.sub(lambda m: u'\\%s' % m.group(), text)

    #Calibre functions
-    #Option to go to official ASCII Bibtex or unofficial UTF-8     
+    #Option to go to official ASCII Bibtex or unofficial UTF-8
    #Go from an unicode entry to ASCII Bibtex format without encoding
    def utf8ToBibtex(self, text):
        if len(text) == 0:
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -61,6 +61,11 @@ def attr(n, d):

 def serialize_collection(mapping_of_recipe_classes):
    collection = E.recipe_collection()
+    '''for u, x in mapping_of_recipe_classes.items():
+        print 11111, u, repr(x.title)
+        if isinstance(x.title, str):
+            x.title.decode('ascii')
+    '''
    for urn in sorted(mapping_of_recipe_classes.keys(),
            key=lambda key: getattr(mapping_of_recipe_classes[key], 'title',
                'zzz')):