Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / novistandard.recipe < prev next >

Wrap

Text File | 2011-09-09 | 5KB | 101 lines

__license__ = 'GPL v3' __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>' ''' www.standard.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe class NoviStandard(BasicNewsRecipe): title = 'Novi Standard' __author__ = 'Darko Miletic' description = 'NoviStandard - energija je neunistiva!' publisher = 'Novi Standard' category = 'news, politics, Serbia' no_stylesheets = True delay = 1 oldest_article = 15 encoding = 'utf-8' publication_type = 'magazine' needs_subscription = 'optional' remove_empty_feeds = True INDEX = 'http://www.standard.rs/' use_embedded_content = False language = 'sr' publication_type = 'magazine' masthead_url = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif} .dropcap{font-family: Georgia,Times,serif1,serif; display:inline} .dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold} .contentheading{color: gray; font-size: x-large} .article-meta, .createdby{color: red} img{margin-top:0.5em; margin-bottom: 0.7em; display: block} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] def get_browser(self): br = BasicNewsRecipe.get_browser() br.open(self.INDEX) if self.username is not None and self.password is not None: br.select_form(name='login') br['username'] = self.username br['passwd' ] = self.password br.submit() return br keep_only_tags =[dict(attrs={'class':['contentheading','article-meta','article-content']})] remove_tags_after =dict(attrs={'class':'extravote-container'}) remove_tags = [ dict(name=['object','link','iframe','meta','base']) ,dict(attrs={'class':'extravote-container'}) ] remove_attributes =['border','background','height','width','align','valign','lang'] feeds = [ (u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss') ,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss') ,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss') ,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss') ,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss') ,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss') ,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss') ,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss') ,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll('div'): if len(item.contents) == 0: item.extract() for item in soup.findAll('a'): limg = item.find('img') if item.string is not None: str = item.string item.replaceWith(str) else: if limg: item.name = 'div' item.attrs = [] else: str = self.tag_to_string(item) item.replaceWith(str) for item in soup.findAll('img'): if not item.has_key('alt'): item['alt'] = 'image' return soup