home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
dna.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
2KB
|
49 lines
'''
dnaindia.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DNAIndia(BasicNewsRecipe):
title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal'
language = 'en_IN'
encoding = 'cp1252'
feeds = [
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
]
remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead',
'article_tools']}]
keep_only_tags = dict(id='middle-col')
remove_tags_after=[dict(attrs={'id':'story'})]
remove_attributes=['style']
no_stylesheets = True
def print_version(self, url):
match = re.search(r'newsid=(\d+)', url)
if not match:
return url
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
a = soup.find(href='http://www.3dsyndication.com/')
if a is not None:
a.parent.extract()
return soup