home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World 2008 September
/
PCWorld_2008-09_cd.bin
/
komunikace
/
kmeleon
/
K-Meleon1.1.3en-US.exe
/
chrome
/
newsfox.jar
/
content
/
newsfox
/
parser.js
< prev
next >
Wrap
Text File
|
2007-10-24
|
19KB
|
544 lines
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Newsfox.
*
* The Initial Developer of the Original Code is
* Ron Pruitt <wa84it@gmail.com>.
* Portions created by the Initial Developer are Copyright (C) 2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Andy Frank <andy@andyfrank.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
const HRS12 = 1000*60*60*12;
const YRS10 = 1000*60*60*24*365.25*10; // close enough
const DATEBASE = -HRS12;
const TOP_NO_DATE = new Date(DATEBASE + HRS12);
const NO_DATE = new Date(DATEBASE);
const TOP_INVALID_DATE = new Date(DATEBASE - HRS12);
const INVALID_DATE = new Date(DATEBASE - 2*HRS12);
const TOP_FUTURE_DATE = new Date(DATEBASE - 3*HRS12);
const CHANNEL_NAME = [ "channel", "channel", "feed" ];
const ENTRY_NAME = [ "item", "item", "entry" ];
const ID_NAME = [ "guid", "guid", "id" ];
const CONTENT_NAME = [ "description", "description", "content" ];
const DATE_NAME = [ "date", "pubDate", "updated" ];
const CATEGORY_NAME = [ "subject", "category", "category" ];
const DATE_NAME2 = [ "", "date", "issued" ]; // RSS: <rc:date>, Atom 0.3
const HREF_NAME = [ "url", "url", "href" ];
const TAG_NAME = [ "a", "img", "area" ];
const ATTR_NAME = [ "href", "src", "href" ];
function Parser2(xml,baseUrl)
{
this.title = null;
this.link = null;
this.items = new Array();
this.parse = function(xml,type,baseUrl)
{
var channel = xml.getElementsByTagName(CHANNEL_NAME[type]);
// BASE
var baseuri = adjustBase(null,baseUrl);
baseuri = adjustBase(baseuri,"/");
if (type == 2) baseuri = getBaseURI(channel[0],baseuri);
// TITLE
var title = channel[0].getElementsByTagName("title");
if (title.length > 0) this.title = getText(title[0]);
// HOMEPAGE
var uri = getLink(channel[0],baseuri,type);
if (uri) this.link = uri.resolve("");
// ITEMS:
var now = new Date();
var itemContainer = (type == 0) ? xml : channel[0];
var items = itemContainer.getElementsByTagName(ENTRY_NAME[type]);
for (var i=0; i<items.length; i++)
{
item = new Article();
// ITEM:BASE
var itembase = null;
if (type == 2) itembase = getBaseURI(items[i],baseuri);
// ITEM:TITLE
title = items[i].getElementsByTagName("title");
if (title.length > 0) item.title = getXhtml(title[0]);
// ITEM:LINK
var uri = getLink(items[i],itembase,type);
// need spec instead of resolve to pick up # anchors in link
if (uri) item.link = uri.spec;
if (item.link == this.link || !item.link) item.link = NO_LINK;
// ITEM:ID
var id = items[i].getElementsByTagName(ID_NAME[type]);
if (id.length > 0)
item.id = getText(id[0]);
else
if (item.link != NO_LINK) item.id = item.link;
if (item.id && item.id.substring(0,5) == "http:" && item.link == NO_LINK)
item.link = (uri) ? uri.resolve(item.id) : item.id;
// ITEM:BODY
var body = items[i].getElementsByTagName(CONTENT_NAME[type]);
if (body.length > 0)
item.body = getXhtml(fixLinks(body[0],itembase));
if (!item.body && type == 2) // atom
{
var body = items[i].getElementsByTagName("summary");
if (body.length > 0)
item.body = getText(fixLinks(body[0],itembase));
}
if (type < 2) // rss
{
var body = items[i].getElementsByTagName("encoded");
if (body.length > 0) item.body = getText(fixLinks(body[0],null));
}
// ITEM:DATE
item.date = NO_DATE;
var idate = items[i].getElementsByTagName(DATE_NAME[type]);
if (idate.length > 0)
if (type != 1)
item.date = setTZDate(getText(idate[0]));
else
item.date = setRFCDate(getText(idate[0]));
else if (idate.length == 0 && type >= 1)
{
var idate = items[i].getElementsByTagName(DATE_NAME2[type]);
if (idate.length > 0) item.date = setTZDate(getText(idate[0]));
}
// date adjustment
if (!gOptions.dateNoneStrict && item.date < TOP_NO_DATE
&& item.date > TOP_INVALID_DATE) item.date = now;
if (!gOptions.dateInvalidStrict && item.date < TOP_INVALID_DATE
&& item.date > TOP_FUTURE_DATE) item.date = now;
if (item.date - now > 10 * 60 * 1000) // 10 minutes
if (gOptions.dateFutureStrict)
while (item.date >= TOP_FUTURE_DATE)
item.date = new Date(item.date - YRS10);
// ITEM:CATEGORIES
var cats = items[i].getElementsByTagName(CATEGORY_NAME[type]);
if (cats.length == 0 && type == 1)
cats = items[i].getElementsByTagName("subject");
var cat = "";
var newcat;
for (var j=0; j<cats.length; j++)
{
if (type < 2)
newcat = getText(cats[j]);
else
newcat = cats[j].getAttribute("term");
cat = mergeCats(cat,newcat);
}
item.category = cat;
// ITEM:ENCLOSURES
if (type < 2)
{
var enc = items[i].getElementsByTagName("enclosure");
for (var j=0; j<enc.length; j++)
item.enclosures.push(newEncl(enc[j],HREF_NAME[type]));
}
else
{
var enc = items[i].getElementsByTagName("link");
for (var j=0; j<enc.length; j++)
if (enc[j].hasAttribute("rel") && enc[j].getAttribute("rel").toLowerCase() == "enclosure")
item.enclosures.push(newEncl(enc[j],HREF_NAME[type]));
}
this.items.push(item);
}
}
// MAIN
var root = xml.documentElement.localName.toLowerCase();
var type;
var errortype = ERROR_OK;
switch (root)
{
case "feed":
type = 2;
break;
case "rss":
type = 1;
break;
case "rdf":
type = 0;
break;
case "parsererror":
errortype = ERROR_INVALID_FEED_URL;
break;
default:
errortype = ERROR_UNKNOWN_FEED_FORMAT + root;
}
if (errortype != ERROR_OK) throw errortype;
this.parse(xml,type,baseUrl);
}
function getBaseURI(xml,base)
{
var baseuri = base;
if (xml.hasAttribute("xml:base"))
baseuri = adjustBase(baseuri,xml.getAttribute("xml:base"));
var links = xml.getElementsByTagName("link");
for (var i=0; i<links.length; i++)
if (links[i].parentNode == xml && links[i].hasAttribute("rel") && links[i].getAttribute("rel").toLowerCase() == "self")
{
baseuri = adjustBase(baseuri,links[i].getAttribute("href"));
break;
}
return baseuri;
}
function adjustBase(baseuri,url)
{
var ioSvc = Components.classes['@mozilla.org/network/io-service;1'].getService(Components.interfaces.nsIIOService);
return ioSvc.newURI(url,null,baseuri);
}
function getLink(xml,baseuri,type)
{
var newuri = null;
var url;
var links = xml.getElementsByTagName("link");
if (links.length == 0) return null;
if (type != 2)
if (links[0].parentNode == xml) url = getText(links[0]);
else return null;
else
{
for (var i=0; i<links.length; i++)
if (links[i].parentNode == xml && (!links[i].hasAttribute("rel") || links[i].getAttribute("rel").toLowerCase() == "alternate"))
{
url = links[i].getAttribute("href");
break;
}
}
newuri = adjustBase(baseuri,url);
return newuri;
}
function setRFCDate(rfcDate)
{
var ndate = new Date(Date.parse(rfcDate));
if (ndate == "Invalid Date") ndate = rescueRFCDate(rfcDate);
return ndate;
}
function setTZDate(isoDate)
{
try
{
var dateTime = isoDate.split("T");
var ymd = dateTime[0].split("-");
var timeSplitter = dateTime[1].match("[Z+-]");
var timeOffset = dateTime[1].split(timeSplitter);
var hms = timeOffset[0].split(":");
for (var i=hms.length; i<3; i++) hms[i] = 0; // hms.length<3 illegal
var utc = Date.UTC(ymd[0],ymd[1]-1,ymd[2],hms[0],hms[1],hms[2]);
var mult = 0;
if (timeSplitter == "+") mult = -1;
else if (timeSplitter == "-") mult = 1;
if (mult != 0)
{
var hm = timeOffset[1].split(":");
// multiply since hm not integers
utc = utc + mult*1000*(hm[0]*3600+hm[1]*60);
}
var ndate = new Date(utc);
if (ndate == "Invalid Date") ndate = INVALID_DATE;
return ndate;
}
catch(e) { return INVALID_DATE; }
}
function fixLinks(node, baseuri)
{
var kids = node.getElementsByTagName("a");
for (var j=0; j<kids.length; j++)
if (kids[j].hasAttribute("href"))
kids[j].setAttribute("target", "_blank");
if (!baseuri) return node;
for (var i=0; i<TAG_NAME.length; i++)
{
var kids = node.getElementsByTagName(TAG_NAME[i]);
for (var j=0; j<kids.length; j++)
if (kids[j].hasAttribute(ATTR_NAME[i]))
kids[j].setAttribute(ATTR_NAME[i],baseuri.resolve(kids[j].getAttribute(ATTR_NAME[i])));
}
return node;
}
function getXhtml(node)
{
var type = node.getAttribute("type");
if (type == "xhtml")
{
var serializer = new XMLSerializer();
var xml = "";
// have to watch out for space before the atom <div>, can only be one <div>
for (var i=0; i<node.childNodes.length; i++)
if (node.childNodes[i].localName == "div")
xml = serializer.serializeToString(node.childNodes[i]);
// div can't be part of content, need to retain namespaces
xml = changeDivToSpan(xml);
return "<xhtml>" + xml.trim() + "</xhtml>";
}
else
return getText(node);
}
function changeDivToSpan(xml)
{
var ind1 = xml.indexOf("<div");
var ind2 = xml.indexOf(":div");
var ind3 = xml.lastIndexOf("div>");
var goodStart = false;
var goodEnd = false;
if (xml.length-ind3 == 4) goodEnd = true;
if (ind1 == 0 || (ind1 == -1 || ind2 < ind1)) goodStart = true;
if (goodStart && goodEnd)
{
if (ind1 == 0) xml = xml.replace("<div","<span");
else xml = xml.replace(":div",":span");
xml = xml.replace(/div>$/,"span>");
}
return xml;
}
function getText(node)
{
var result = "";
var walker = node.ownerDocument.createTreeWalker(node, NodeFilter.SHOW_CDATA_SECTION | NodeFilter.SHOW_TEXT, null, false);
while(walker.nextNode()) result += walker.currentNode.nodeValue;
return result.trim();
}
function mergeCats(cat,newcat)
{
var ScatS = "\/" + cat + "\/";
var newcatArray = newcat.split("\/");
for (var i=0; i<newcatArray.length; i++)
{
var SnewcatS = "\/" + newcatArray[i] + "\/";
if (ScatS.indexOf(SnewcatS) == -1) ScatS += SnewcatS;
}
var Back = ScatS;
while (Back.indexOf("\/\/") > -1) Back = Back.replace(/\/\//g, "\/");
Back = Back.replace(/^\//, "");
Back = Back.replace(/\/$/, "");
return Back;
}
function newEncl(enc,hrefname)
{
var encl = new Enclosure();
encl.url = enc.getAttribute(hrefname);
encl.type = enc.getAttribute("type");
encl.length = enc.getAttribute("length");
return encl;
}
function displayDate(date, style)
{
const NF_SB = document.getElementById("newsfox-string-bundle");
if (date > TOP_NO_DATE)
{
if (style == 2)
return date.toLocaleString();
else if (style == 0)
{
var hour = date.getHours();
if (hour < 10) hour = "0" + hour;
var min = date.getMinutes();
if (min < 10) min = "0" + min;
var time = hour + ":" + min;
var dat = date.getFullYear() + "-" + (date.getMonth()+1) + "-" + date.getDate() + " ";
var now = new Date();
var nowdat = now.getFullYear() + "-" + (now.getMonth()+1) + "-" + now.getDate() + " ";
if (dat == nowdat) dat = "";
return (dat + time);
}
else // style == 1
{
var sdf = Components.classes["@mozilla.org/intl/scriptabledateformat;1"]
.createInstance(Components.interfaces.nsIScriptableDateFormat);
return sdf.FormatDateTime("", sdf.dateFormatShort,
sdf.timeFormatNoSeconds, date.getFullYear(), date.getMonth()+1,
date.getDate(), date.getHours(), date.getMinutes(), date.getSeconds());
}
}
else if (date <= TOP_FUTURE_DATE) return NF_SB.getString('FUTURE_DATE');
else if (date <= TOP_INVALID_DATE) return NF_SB.getString('INVALID_DATE');
else return NF_SB.getString('NO_DATE');
}
/**
* Get a human readable summary of error. (from Andy Frank)
*/
function getErrorSummary(code)
{
const NF_SB = document.getElementById("newsfox-string-bundle");
var strOK = NF_SB.getString('feed_ok');
var strINVALID = NF_SB.getString('feed_invalid');
var strUNKNOWN = NF_SB.getString('feed_format_unknown');
var strOTHER = NF_SB.getString('feed_other_error');
switch (code.substring(0,1))
{
case ERROR_OK:
return strOK;
case ERROR_INVALID_FEED_URL:
return strINVALID;
case ERROR_UNKNOWN_FEED_FORMAT:
return strUNKNOWN + ": " + code.substring(1);
default: return strOTHER;
}
}
/**
* Get possible remedies for this error. (from Andy Frank)
*/
function getErrorRemedies(code)
{
// TODO - break out into HTML referenced by ID
const NF_SB = document.getElementById("newsfox-string-bundle");
var remedyINVALID = NF_SB.getString('remedy_invalid');
var remedyUNKNOWN = NF_SB.getString('remedy_format_unknown');
switch (code.substring(0,1))
{
case ERROR_OK: return "";
case ERROR_INVALID_FEED_URL:
return remedyINVALID;
case ERROR_UNKNOWN_FEED_FORMAT:
return remedyUNKNOWN;
default: return code;
}
}
function rescueRFCDate(rfcDate)
{
try
{
var dateArray = rfcDate.split(" ");
var yr = dateArray[3];
if (yr.length == 2) yr = yr < 70 ? "20" + yr: "19" + yr;
dateArray[3] = yr;
// From Bernhard Schelling bug#17681
if (dateArray.length == 6 && isNaN(dateArray[5]))
{
var timeZone = String(dateArray[5]).toUpperCase();
if (timeZone == 'ACDT') { dateArray[5] = '+1030'; }
else if (timeZone == 'ACST') { dateArray[5] = '+0930'; }
else if (timeZone == 'ADT') { dateArray[5] = '-0300'; }
else if (timeZone == 'AEDT') { dateArray[5] = '+1100'; }
else if (timeZone == 'AEST') { dateArray[5] = '+1000'; }
else if (timeZone == 'AHST') { dateArray[5] = '-1000'; }
else if (timeZone == 'AKDT') { dateArray[5] = '-0800'; }
else if (timeZone == 'AKST') { dateArray[5] = '-0900'; }
else if (timeZone == 'AST') { dateArray[5] = '-0400'; }
else if (timeZone == 'AT') { dateArray[5] = '-0200'; }
else if (timeZone == 'AWDT') { dateArray[5] = '+0900'; }
else if (timeZone == 'AWST') { dateArray[5] = '+0800'; }
else if (timeZone == 'BST') { dateArray[5] = '+0100'; }
else if (timeZone == 'BT') { dateArray[5] = '+0300'; }
else if (timeZone == 'CAT') { dateArray[5] = '-1000'; }
else if (timeZone == 'CCT') { dateArray[5] = '+0800'; }
else if (timeZone == 'CEDT') { dateArray[5] = '+0200'; }
else if (timeZone == 'CEST') { dateArray[5] = '+0200'; }
else if (timeZone == 'CET') { dateArray[5] = '+0100'; }
else if (timeZone == 'CXT') { dateArray[5] = '+0700'; }
else if (timeZone == 'EADT') { dateArray[5] = '+1100'; }
else if (timeZone == 'EAST') { dateArray[5] = '+1000'; }
else if (timeZone == 'EEDT') { dateArray[5] = '+0300'; }
else if (timeZone == 'EEST') { dateArray[5] = '+0300'; }
else if (timeZone == 'EET') { dateArray[5] = '+0200'; }
else if (timeZone == 'FST') { dateArray[5] = '+0200'; }
else if (timeZone == 'FWT') { dateArray[5] = '+0100'; }
else if (timeZone == 'GST') { dateArray[5] = '+1000'; }
else if (timeZone == 'HAA') { dateArray[5] = '-0300'; }
else if (timeZone == 'HAC') { dateArray[5] = '-0500'; }
else if (timeZone == 'HADT') { dateArray[5] = '-0900'; }
else if (timeZone == 'HAE') { dateArray[5] = '-0400'; }
else if (timeZone == 'HAP') { dateArray[5] = '-0700'; }
else if (timeZone == 'HAR') { dateArray[5] = '-0600'; }
else if (timeZone == 'HAST') { dateArray[5] = '-1000'; }
else if (timeZone == 'HAT') { dateArray[5] = '-0230'; }
else if (timeZone == 'HAY') { dateArray[5] = '-0800'; }
else if (timeZone == 'HDT') { dateArray[5] = '-0900'; }
else if (timeZone == 'HNA') { dateArray[5] = '-0400'; }
else if (timeZone == 'HNC') { dateArray[5] = '-0600'; }
else if (timeZone == 'HNE') { dateArray[5] = '-0500'; }
else if (timeZone == 'HNP') { dateArray[5] = '-0800'; }
else if (timeZone == 'HNR') { dateArray[5] = '-0700'; }
else if (timeZone == 'HNT') { dateArray[5] = '-0330'; }
else if (timeZone == 'HNY') { dateArray[5] = '-0900'; }
else if (timeZone == 'HST') { dateArray[5] = '-1000'; }
else if (timeZone == 'IDLE') { dateArray[5] = '+1200'; }
else if (timeZone == 'IDLW') { dateArray[5] = '-1200'; }
else if (timeZone == 'IST') { dateArray[5] = '+0100'; }
else if (timeZone == 'JST') { dateArray[5] = '+0900'; }
else if (timeZone == 'MEST') { dateArray[5] = '+0200'; }
else if (timeZone == 'MESZ') { dateArray[5] = '+0200'; }
else if (timeZone == 'MET') { dateArray[5] = '+0100'; }
else if (timeZone == 'MEWT') { dateArray[5] = '+0100'; }
else if (timeZone == 'MEZ') { dateArray[5] = '+0100'; }
else if (timeZone == 'NDT') { dateArray[5] = '-0230'; }
else if (timeZone == 'NFT') { dateArray[5] = '+1130'; }
else if (timeZone == 'NST') { dateArray[5] = '-0330'; }
else if (timeZone == 'NT') { dateArray[5] = '-1100'; }
else if (timeZone == 'NZDT') { dateArray[5] = '+1300'; }
else if (timeZone == 'NZST') { dateArray[5] = '+1200'; }
else if (timeZone == 'NZT') { dateArray[5] = '+1200'; }
else if (timeZone == 'SST') { dateArray[5] = '+0200'; }
else if (timeZone == 'SWT') { dateArray[5] = '+0100'; }
else if (timeZone == 'UTC') { dateArray[5] = '-0000'; }
else if (timeZone == 'WADT') { dateArray[5] = '+0800'; }
else if (timeZone == 'WAT') { dateArray[5] = '-0100'; }
else if (timeZone == 'WEDT') { dateArray[5] = '+0100'; }
else if (timeZone == 'WEST') { dateArray[5] = '+0100'; }
else if (timeZone == 'WET') { dateArray[5] = '-0000'; }
else if (timeZone == 'WST') { dateArray[5] = '+0800'; }
else if (timeZone == 'YDT') { dateArray[5] = '-0800'; }
else if (timeZone == 'YST') { dateArray[5] = '-0900'; }
else if (timeZone == 'ZP4') { dateArray[5] = '+0400'; }
else if (timeZone == 'ZP5') { dateArray[5] = '+0500'; }
else if (timeZone == 'ZP6') { dateArray[5] = '+0600'; }
//Support for single letter military time zones
else if (dateArray[5].length==1 && dateArray[5].match(/[A-I,K-Z]/))
{
var i = dateArray[5].charCodeAt(0);
i = (i==90?0:i<74?i-64:i<78?i-65:77-i);
dateArray[5] = (i<-9?'-':i<0?'-0':i<10?'+0':'+')+String(i<0?0-i:i)+'00';
}
}
var newString = dateArray.join(" ");
var ndate = new Date(Date.parse(newString));
if (ndate == "Invalid Date") return INVALID_DATE;
else return ndate;
}
catch(e) { return INVALID_DATE; }
}