Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / eluniversalimpresa.recipe < prev next >

Wrap

Text File | 2011-09-09 | 3KB | 83 lines

from calibre.web.feeds.news import BasicNewsRecipe class ElUniversalImpresaRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' language = 'es_MX' version = 1 title = u'El Universal (Edici\u00F3n Impresa)' publisher = u'El Universal' category = u'News, Mexico' description = u'News from Mexico' remove_empty_feeds = True remove_javascript = True INDEX = 'http://www.eluniversal.com.mx' extra_css = ''' body{font-family:verdana,arial,helvetica,geneva,sans-serif;} ''' conversion_options = {'comments': description, 'tags': category, 'language': 'en', 'publisher': publisher, 'linearize_tables': True} def parse_index(self): soup = self.index_to_soup('http://www.eluniversal.com.mx/edicion_impresa.html') index = [] table = soup.find('table', attrs = {'width': '500'}) articles = [] for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'): a = td.a a.extract() title = self.tag_to_string(a) url = self.INDEX + a['href'] description = self.tag_to_string(td) articles.append({'title': title, 'date': None, 'url': url, 'description' : description}) index.append(('Primera Plana', articles)) for td in table.findAll(lambda tag: tag.name == 'td' and len(tag.attrs) == 0): articles = [] feedTitle = None for a in td.findAll('a'): if not feedTitle: feedTitle = self.tag_to_string(a) continue title = self.tag_to_string(a) url = self.INDEX + a['href'] articles.append({'title': title, 'date': None, 'url': url, 'description': ''}) index.append((feedTitle, articles)) return index def print_version(self, url): if url.find('wcarton') >= 0: return None main, sep, id = url.rpartition('/') return main + '/vi_' + id def preprocess_html(self, soup): table = soup.find('table') table.extract() for p in soup.findAll('p'): if self.tag_to_string(p).strip() == '': p.extract() tag = soup.find('font', attrs = {'color': '#0F046A'}) if tag: for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: if tag.has_key(attr): del tag[attr] tag.name = 'h1' return soup