home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
paperli_topic.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
2KB
|
59 lines
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
paperli
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
class paperli_topics(BasicNewsRecipe):
# Customize this recipe and change paperli_tag and title below to
# download news on your favorite tag
paperli_tag = 'climate'
title = u'The #climate Daily - paperli'
#-------------------------------------------------------------
__author__ = 'Hiroshi Miura'
oldest_article = 7
max_articles_per_feed = 100
description = 'paper.li page about '+ paperli_tag
publisher = 'paper.li'
category = 'paper.li'
language = 'en'
encoding = 'utf-8'
remove_javascript = True
masthead_title = u'The '+ paperli_tag +' Daily'
timefmt = '[%y/%m/%d]'
base_url = 'http://paper.li'
index = base_url+'/tag/'+paperli_tag
def parse_index(self):
# get topics
topics = []
soup = self.index_to_soup(self.index)
topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
for item in topics_lists.findAll('li', attrs={'class':""}):
itema = item.find('a',href=True)
topics.append({'title': itema.string, 'url': itema['href']})
#get feeds
feeds = []
for topic in topics:
newsarticles = []
soup = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
topstories = soup.findAll('div',attrs={'class':'yui-u'})
for itt in topstories:
itema = itt.find('a',href=True,attrs={'class':'ts'})
if itema is not None:
itemd = itt.find('div',text=True, attrs={'class':'text'})
newsarticles.append({
'title' :itema.string
,'date' :strftime(self.timefmt)
,'url' :itema['href']
,'description':itemd.string
})
feeds.append((topic['title'], newsarticles))
return feeds