home *** CD-ROM | disk | FTP | other *** search
- //=========================================================================================================
- //
- // Copyright 2002 Macromedia, Inc. All rights reserved.
- //
- // Feature: Paste Fix
- // Author: JDH
- // Module: PMScanners.js
- // Purpose: HTML Scanning classes.
- // Updates:
- // 5/31/02 - Started file control
- //
- //=========================================================================================================
-
-
- // This source file contains a number of generic helper classes that parse or alter a stream of HTML using
- // the scanSourceString functionality in the Dreamweaver class. The pattern for the use of all of these
- // classes is:
- //
- // var scanner = new <scanner_name>( <args> );
- // var retVal = scanner.scan( html );
- //
- // Where the retVal is either the altered HTML (for a scanner that alters HTML), or an array (or associative
- // array) for scanners that parse the HTML looking for specific tags.
- //
- // These classes are essentially unrolled, by which I mean that, while there are more elegant multi-level
- // hierachal solutions to the problems solved by these scanners, I took a more lightweight approach and made
- // more scanners that had lower levels of functionality, and did not descend from a large hierarchy. The
- // reason was mainly effeciency and simplicity. Only the StructureScanner class is meant to be derived from
- // during use.
-
- // Some thoughts on scanSourceString:
- //
- // 1) Only implement the methods in the class that you need to get your job done. Don't implement closeTagBegin
- // if you don't need it. scanSourceString looks for the existence of these methods at the beginning of the parse
- // and will short-curcuit the call if you haven't defined a method, and that will save you some time. It also
- // makes the intention of your code more clear.
- //
- // 2) Don't return anything from the scanSourceString methods. scanSourceString isn't looking for a return value
- // so don't give it one.
- //
- // 3) I've seen a pattern in other code where you create an object with new Object and then add methods in
- // on slots using new Function. I think the code is hard to read and hard to maintain, without performance benefit.
- // I would avoid this pattern and I have not used it here.
-
-
- function handleAttributeText( str )
- {
- str = str.replace( /\"/g, "" );
- return str;
- }
-
-
- //---------------------------------------------------------------------------------------------------------
- // GetContentScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The GetContentScanner class gets the contents of any tags in the array of tags
- // specified in the input array. For example, specifying: [ "p" ] and then scanning
- // this:
- //
- // <html><body><p>This</p><p>is</p><p>a</p><p>test</p></body></html>
- //
- // Would return an array like, [ "This", "is", "a", "test" ].
- //
- // Please note that this code was specifically designed to ignore directives and to get the
- // contents within directives.
- //
- // This scanner does not alter the HTML in any way.
-
- function GetContentScanner( tagNameArray )
- {
- this._tagNameArray = tagNameArray;
- this._findLookup = {};
- for( var index in tagNameArray )
- this._findLookup[ tagNameArray[ index ] ] = 1;
- }
-
- // External methods
-
- GetContentScanner.prototype.scan = GetContentScanner_scan;
-
- // scanSourceString specific methods
-
- GetContentScanner.prototype.directive = GetContentScanner_directive;
- GetContentScanner.prototype.text = GetContentScanner_text;
- GetContentScanner.prototype.openTagBegin = GetContentScanner_openTagBegin;
- GetContentScanner.prototype.closeTagBegin = GetContentScanner_closeTagBegin;
-
- function GetContentScanner_scan( source )
- {
- this._found = [];
- this._inTag = false;
- this._directives = [];
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "GetContentScanner bad scan" );
-
- for( var dir_index in this._directives )
- dw.scanSourceString( this._directives[ dir_index ], this );
-
- return this._found;
- }
-
- function GetContentScanner_directive( code, offset )
- {
- try {
-
- code = code.replace( /^\<([^>]*)>/, "" );
- code = code.replace( /\<([^>]*)>$/, "" );
-
- this._directives.push( code );
-
- } catch(e) {
-
- this._scan_error = false;
- return false;
-
- }
-
- return true;
- }
-
- function GetContentScanner_text( code, offset )
- {
- try {
-
- if ( this._inTag )
- this._found.push( code );
-
- } catch(e) {
-
- this._scan_error = false;
- return false;
-
- }
-
- return true;
- }
-
- function GetContentScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._findLookup[ tag.toLowerCase() ] )
- this._inTag = true;
-
- } catch(e) {
-
- this._scan_error = false;
- return false;
-
- }
-
- return true;
- }
-
- function GetContentScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._findLookup[ tag.toLowerCase() ] )
- this._inTag = false;
-
- } catch(e) {
-
- this._scan_error = false;
- return false;
-
- }
-
- return true;
- }
-
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // FindDirectiveScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The FindDirectiveScanner class gets the contents of any tags in the array of tags
- // specified in the input array. For example, specifying: [ "p" ] and then scanning
- // this:
- //
- // <html><body><p>This</p><p>is</p><p>a</p><p>test</p></body></html>
- //
- // Would return an array like, [ "This", "is", "a", "test" ].
- //
- // Please note that this code was specifically designed to ignore directives and to get the
- // contents within directives.
- //
- // This scanner does not alter the HTML in any way.
-
- function FindDirectiveScanner( directive )
- {
- this._directive = directive;
- }
-
- // External methods
-
- FindDirectiveScanner.prototype.scan = FindDirectiveScanner_scan;
-
- // scanSourceString specific methods
-
- FindDirectiveScanner.prototype.directive = FindDirectiveScanner_directive;
-
- function FindDirectiveScanner_scan( source )
- {
- this._found = false;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "FindDirectiveScanner bad scan" );
-
- return this._found;
- }
-
- function FindDirectiveScanner_directive( code, offset )
- {
- try {
-
- if ( code == this._directive )
- this._found = true;
-
- return true;
-
- } catch(e) {
-
- this._scan_error = false;
- return false;
-
- }
-
- return true;
- }
-
-
- //---------------------------------------------------------------------------------------------------------
- // GetMetaTagsScanner
- //---------------------------------------------------------------------------------------------------------
-
- // Scans the source string for meta tags and returns an associative array
- // of name value pairs. For example, scanning this HTML:
- //
- // <html><head>
- // <meta name="key1" content="value1">
- // <meta name="key2" content="value2">
- // </head></html>
- //
- // Will return { key1: "value1", key2: "value2" }
- //
- // This scanner does not alter the HTML in any way.
-
- function GetMetaTagsScanner( ) { }
-
- // External methods
-
- GetMetaTagsScanner.prototype.scan = GetMetaTagsScanner_scan;
-
- // scanSourceString specific methods
-
- GetMetaTagsScanner.prototype.openTagBegin = GetMetaTagsScanner_openTagBegin;
- GetMetaTagsScanner.prototype.closeTagBegin = GetMetaTagsScanner_closeTagBegin;
- GetMetaTagsScanner.prototype.attribute = GetMetaTagsScanner_attribute;
-
- function GetMetaTagsScanner_scan( source )
- {
- this._found = {};
- this._inMeta = false;
- this._name = null;
- this._content = null;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "GetMetaTagsScanner bad scan" );
-
- return this._found;
- }
-
- function GetMetaTagsScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( tag.toLowerCase() == "meta" )
- {
- this._inMeta = true;
- this._name = null;
- this._content = null;
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
- }
-
- function GetMetaTagsScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._inMeta )
- this._inMeta = false;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
- }
-
- function GetMetaTagsScanner_attribute( name, code )
- {
- try {
-
- if ( this._inMeta )
- {
- if ( name.toLowerCase() == "name" )
- this._name = code;
- if ( name.toLowerCase() == "content" )
- this._content = code;
-
- if ( this._name != null && this._content != null )
- {
- this._found[ this._name.toLowerCase() ] = this._content;
- this._name = null;
- this._content = null;
- }
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // ParseSupportListsScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The ParseSupportListsScanner was specifically written with word in mind. Word has some fairly
- // interesting directive laced HTML that sits around hierarchal list items. This does not apply to single
- // level numeric or bulleted lists, which are implemented with <LI> tags. This only applies to multi-level
- // hierarchal lists, like:
- //
- // 1. First level
- // 1.1 Sub level 1
- // 1.1.1 Something reminiscent of the military
- // 1.2 Sub level 2
- //
- // The Word format puts in a directive around supporting lists, this parser removes only the
- // sections we want from it. As an example:
- //
- // <![if !supportLists]><span style='mso-list:Ignore'>I.<span
- // style='font:7.0pt "Times New Roman"'> &nb
- // sp;
- // </span></span><![endif]>
- //
- // Becomes, just:
- //
- // I.
- //
- // The encloding <P> tag has the indent level in the style attribute.
-
- function ParseSupportListsScanner( ) { }
-
- // Local methods
-
- ParseSupportListsScanner.prototype.scan = ParseSupportListsScanner_scan;
-
- // scanSourceString specific methods
-
- ParseSupportListsScanner.prototype.directive = ParseSupportListsScanner_directive;
- ParseSupportListsScanner.prototype.text = ParseSupportListsScanner_text;
- ParseSupportListsScanner.prototype.openTagBegin = ParseSupportListsScanner_openTagBegin;
- ParseSupportListsScanner.prototype.openTagEnd = ParseSupportListsScanner_openTagEnd;
- ParseSupportListsScanner.prototype.closeTagBegin = ParseSupportListsScanner_closeTagBegin;
- ParseSupportListsScanner.prototype.attribute = ParseSupportListsScanner_attribute;
-
- function ParseSupportListsScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._inSupportLists = false;
- this._firstItem = false;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "ParseSupportListsScanner bad scan" );
-
- return this._sb.get();
- }
-
- function ParseSupportListsScanner_directive( code, offset )
- {
- try {
-
- var testCode = code.toLowerCase();
- if( code.match( /^\<\!\[if \!supportLists\]>/ ) )
- {
- this._inSupportLists = true;
- this._firstItem = true;
- }
- else if ( this._inSupportLists )
- {
- if ( code.match( /^\<\!\[endif\]\>/ ) )
- this._inSupportLists = false;
- }
- else
- {
- this._sb.append( code );
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function ParseSupportListsScanner_text( code, offset )
- {
- try {
-
- if ( this._inSupportLists )
- code = code.replace( /\ /g, "" );
-
- if ( this._firstItem )
- {
- this._firstItem = false;
- this._sb.append( " " );
- }
- else
- {
- if ( code.length > 0 )
- this._sb.append( code + " " );
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function ParseSupportListsScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( ! this._inSupportLists )
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function ParseSupportListsScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( ! this._inSupportLists )
- this._sb.append( ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function ParseSupportListsScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( ! this._inSupportLists )
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function ParseSupportListsScanner_attribute( name, code )
- {
- try {
-
- if ( ! this._inSupportLists )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveConditionalsScanner
- //---------------------------------------------------------------------------------------------------------
-
- // This scanner is specific to MS products. The MS HTML output format is laced with
- // directives throughout. What this scanner does is find the directives and remove them. Additionally, if some
- // interesting tags are within the directive then it will keep just that tag and remove the surrounding directive.
- // It is important to note that there are often nested directives with MS documents.
- //
- // An example of the directives you might see is:
- //
- // <![if !vml]><span style='mso-ignore:vglayout'><table cellpadding=0 cellspacing=0>
- // <tr><td width=63 height=0></td></tr>
- // <tr><td></td>
- // <td><![endif]><![if !excel]><img width=482 height=247 src="file:some_temp_file.gif">
- // <![endif]><![if !vml]></td>
- // <td width=31></td>
- // </tr>
- // <tr><td height=8></td></tr></table></span><![endif]>
- //
- // When all we really want is:
- //
- // <img width=482 height=247 src="file:some_temp_file.gif">
- //
- // Passing allowTags = { img: 1 } will do that for you.
- //
- // One note on RemoveConditionalsScanner. The tags you specify with allowTags should be tags that have opens with
- // no closes, like <img> or <br>. The code is not built generically enough to handle open and close pairs. If that
- // is required then the class will need some redesigning.
-
- function RemoveConditionalsScanner( allowTags )
- {
- this._allowTags = allowTags;
- }
-
- // External methods
-
- RemoveConditionalsScanner.prototype.scan = RemoveConditionalsScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveConditionalsScanner.prototype.directive = RemoveConditionalsScanner_directive;
- RemoveConditionalsScanner.prototype.text = RemoveConditionalsScanner_text;
- RemoveConditionalsScanner.prototype.openTagBegin = RemoveConditionalsScanner_openTagBegin;
- RemoveConditionalsScanner.prototype.openTagEnd = RemoveConditionalsScanner_openTagEnd;
- RemoveConditionalsScanner.prototype.closeTagBegin = RemoveConditionalsScanner_closeTagBegin;
- RemoveConditionalsScanner.prototype.attribute = RemoveConditionalsScanner_attribute;
-
- function RemoveConditionalsScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._inDirective = 0;
- this._exceptionTag = false;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveConditionalsScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveConditionalsScanner_directive( code, offset )
- {
- try {
-
- var testCode = code.toLowerCase();
- if( testCode.match( /^\<\!\[if/ ) )
- {
- this._inDirective++;
- }
- else if ( testCode.match( /^\<\!\-\-\[if/ ) )
- {
- ;
- }
- else if ( this._inDirective )
- {
- if ( testCode.match( /^\<\!\[endif\]/ ) )
- this._inDirective--;
- }
- else
- {
- if ( code.match( /\<\!\-\-(\s*)startfragment(\s*)\-\->/i ) || code.match( /\<\!\-\-(\s*)endfragment(\s*)\-\->/i ) )
- this._sb.append( code );
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveConditionalsScanner_text( code, offset )
- {
- try {
-
- if ( this._inDirective == 0 )
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveConditionalsScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._allowTags[ tag.toLowerCase() ] )
- this._exceptionTag = true;
-
- if ( this._inDirective == 0 || this._exceptionTag )
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveConditionalsScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._inDirective == 0 || this._exceptionTag )
- this._sb.append( ">" );
-
- this._exceptionTag = false;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveConditionalsScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._inDirective == 0 || this._exceptionTag )
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveConditionalsScanner_attribute( name, code )
- {
- try {
-
- if ( this._inDirective == 0 || this._exceptionTag )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // FindClippingScanner
- //---------------------------------------------------------------------------------------------------------
-
- // FindClippingScanner is designed to find the fragment of the clipboard in the HTML and return just that
- // fragment with the minimum amount of support HTML around it. As an example:
- //
- // <html><head>.... Cruft...</head><body>... More cruft ...
- // <!---StartFragment---><p>Some small text</p><!---EndFragment--->... More cruft ...</body></html>
- //
- // Becomes:
- //
- // <html><body><!---StartFragment---><p>Some small text</p><!---EndFragment---></body></html>
- //
- // The idea is that removing all of the header, body pre, and body post information.
- //
- // In the case where there is no <!--StartFragment--> (where we are importing the contents of a file) then
- // we look for the start and end <body> tags.
-
- function FindClippingScanner( ) { }
-
- // External methods
-
- FindClippingScanner.prototype.scan = FindClippingScanner_scan;
-
- // scanSourceString specific methods
-
- FindClippingScanner.prototype.directive = FindClippingScanner_directive;
- FindClippingScanner.prototype.text = FindClippingScanner_text;
- FindClippingScanner.prototype.openTagBegin = FindClippingScanner_openTagBegin;
- FindClippingScanner.prototype.openTagEnd = FindClippingScanner_openTagEnd;
- FindClippingScanner.prototype.closeTagBegin = FindClippingScanner_closeTagBegin;
- FindClippingScanner.prototype.attribute = FindClippingScanner_attribute;
-
- function FindClippingScanner_scan( source, context )
- {
- this._inClipping = false;
- this._firstTagException = false;
- this._findComment = false;
- this._clipTag = "";
- this._frag = "";
- this._scan_error = false;
-
- this._sb = context.createStringBuffer();
-
- if( source.match( /\<\!\-\-(\s*)startfragment(\s*)\-\->/i ) )
- {
- this._findComment = true;
- this._startText = /\<\!\-\-(\s*)startfragment(\s*)\-\->/i;
- this._endText = /\<\!\-\-(\s*)endfragment(\s*)\-\->/i;
- }
- else
- this._clipTag = "body";
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "FindClippingScanner bad scan" );
-
- var text = this._sb.get();
-
- return "<html><body>" + text + "</body></html>";
- }
-
- function FindClippingScanner_directive( code, offset )
- {
- try {
-
- var testCode = code.toLowerCase();
-
- if( this._findComment && this._startText.exec( testCode ) )
- {
- code = "<!--StartFragment-->";
- this._inClipping = true;
- }
-
- if( this._findComment && this._endText.exec( testCode ) )
- {
- code = "<!--EndFragment-->";
- this._sb.append( code );
- this._inClipping = false;
- }
- else if ( this._inClipping )
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function FindClippingScanner_text( code, offset )
- {
- try {
-
- if ( this._inClipping )
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function FindClippingScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._inClipping )
- this._frag = "<" + tag;
-
- if ( this._clipTag == tag && this._findComment == false )
- {
- this._inClipping = true;
- this._firstTagException = true;
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function FindClippingScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._inClipping && this._firstTagException == false )
- {
- this._frag += ">";
- this._sb.append( this._frag );
- }
-
- this._firstTagException = false;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function FindClippingScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._clipTag == tag && this._findComment == false )
- this._inClipping = false;
-
- if ( this._inClipping )
- this._sb.append( "</" + tag + ">" );
-
- this._exceptionTag = false;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function FindClippingScanner_attribute( name, code )
- {
- try {
-
- if ( this._inClipping && this._firstTagException == false )
- this._frag += " " + name + "=\"" + handleAttributeText(code) + "\"";
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveTagsScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The RemoveTagScanner removes any tag listed in the associative array passed
- // in with the constructor. For example, to remove every <P> tag from the
- // following HTML:
- //
- // <html><body><p>This</p><ul><li>is<li>a<li>test</ul></body><html>
- //
- // Yould would pass { p: 1 } into the constructor and invoke scan with the HTML.
- // The return value would be:
- //
- // <html><body><ul><li>is<li>a<li>test</ul></body><html>
-
- function RemoveTagsScanner( tagLookup )
- {
- this._tagLookup = tagLookup;
- }
-
- // External methods
-
- RemoveTagsScanner.prototype.scan = RemoveTagsScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveTagsScanner.prototype.directive = RemoveTagsScanner_directive;
- RemoveTagsScanner.prototype.text = RemoveTagsScanner_text;
- RemoveTagsScanner.prototype.openTagBegin = RemoveTagsScanner_openTagBegin;
- RemoveTagsScanner.prototype.openTagEnd = RemoveTagsScanner_openTagEnd;
- RemoveTagsScanner.prototype.closeTagBegin = RemoveTagsScanner_closeTagBegin;
- RemoveTagsScanner.prototype.attribute = RemoveTagsScanner_attribute;
-
- function RemoveTagsScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._inGoodTag = true;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveTagsScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveTagsScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveTagsScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveTagsScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._tagLookup[ tag.toLowerCase() ] )
- this._inGoodTag = true;
- else
- this._inGoodTag = false;
-
- if ( this._inGoodTag )
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveTagsScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._inGoodTag )
- this._sb.append( ">" );
-
- this._inGoodTag = true;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveTagsScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._tagLookup[ tag.toLowerCase() ] )
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveTagsScanner_attribute( name, code )
- {
- try {
-
- if ( this._inGoodTag )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveAttributesScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The RemoveAttributesScanner removes attributes and styles that are NOT
- // listed in the constructor. For example with the HTML:
- //
- // <html><body><p>This</p><ul><li>is<li>a<li>test</ul></body><html>
- //
- // Passing in { html: 1, body: 1, p: 1 } as the attributes would get you:
- //
- // <html><body><p>This</p> is a test </body><html>
- //
- // The same thing applies to the attributes. So on this html:
- //
- // <p style="mso-reject: 1; font-family: Arial">
- //
- // Passing in { "font-family": 1 } as the style filter would get you:
- //
- // <p style="font-family: Arial;">
- //
-
- function RemoveAttributesScanner( attributeLookup, styleLookup )
- {
- this._attributeLookupMaster = attributeLookup;
- this._styleLookupMaster = styleLookup;
- }
-
- // External methods
-
- RemoveAttributesScanner.prototype.scan = RemoveAttributesScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveAttributesScanner.prototype.directive = RemoveAttributesScanner_directive;
- RemoveAttributesScanner.prototype.text = RemoveAttributesScanner_text;
- RemoveAttributesScanner.prototype.openTagBegin = RemoveAttributesScanner_openTagBegin;
- RemoveAttributesScanner.prototype.openTagEnd = RemoveAttributesScanner_openTagEnd;
- RemoveAttributesScanner.prototype.closeTagBegin = RemoveAttributesScanner_closeTagBegin;
- RemoveAttributesScanner.prototype.attribute = RemoveAttributesScanner_attribute;
-
- function RemoveAttributesScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._tagName = null;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveAttributesScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveAttributesScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveAttributesScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveAttributesScanner_openTagBegin( tag, offset )
- {
- try {
-
- this._tagName = tag.toLowerCase();
-
- this._attributeLookup = this._attributeLookupMaster[ this._tagName ];
- this._styleLookup = this._styleLookupMaster[ this._tagName ];
-
- if ( this._attributeLookup == null )
- this._attributeLookup = {};
- if ( this._styleLookup == null )
- this._styleLookup = {};
-
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveAttributesScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- this._sb.append( ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveAttributesScanner_closeTagBegin( tag, offset )
- {
- try {
-
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveAttributesScanner_attribute( name, code )
- {
- try {
-
- if ( this._attributeLookup[ name.toLowerCase() ] )
- {
- if ( name.toLowerCase() == "style" )
- {
- var styles = Utils_ParseStyle( code );
-
- for( var style in styles )
- {
- if ( this._styleLookup[ style.toLowerCase() ] == null )
- styles = Utils_DeleteArrayItem( style, styles );
- }
-
- // Rebuild the style text
-
- code = Utils_BuildStyle( styles );
- }
-
- if ( code.length > 0 )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveOnlyTheseTagsScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The RemoveOnlyTheseTagsScanner removes only the specified tags from the given HTML stream.
- // So given this HTML:
- //
- // <html><body><p>This is <b>bold</b> and <i>italic</i></p></body></html>
- //
- // With a filter of { b: 1, i: 1 }, you would get:
- //
- // <html><body><p>This is bold and italic </p></body></html>
-
- function RemoveOnlyTheseTagsScanner( tagLookup )
- {
- this._tagLookup = tagLookup;
- }
-
- // External methods
-
- RemoveOnlyTheseTagsScanner.prototype.scan = RemoveOnlyTheseTagsScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveOnlyTheseTagsScanner.prototype.directive = RemoveOnlyTheseTagsScanner_directive;
- RemoveOnlyTheseTagsScanner.prototype.text = RemoveOnlyTheseTagsScanner_text;
- RemoveOnlyTheseTagsScanner.prototype.openTagBegin = RemoveOnlyTheseTagsScanner_openTagBegin;
- RemoveOnlyTheseTagsScanner.prototype.openTagEnd = RemoveOnlyTheseTagsScanner_openTagEnd;
- RemoveOnlyTheseTagsScanner.prototype.closeTagBegin = RemoveOnlyTheseTagsScanner_closeTagBegin;
- RemoveOnlyTheseTagsScanner.prototype.attribute = RemoveOnlyTheseTagsScanner_attribute;
-
- function RemoveOnlyTheseTagsScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._inGoodTag = true;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveOnlyTheseTagsScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveOnlyTheseTagsScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseTagsScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseTagsScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._tagLookup[ tag.toLowerCase() ] )
- this._inGoodTag = false;
- else
- this._inGoodTag = true;
-
- if ( this._inGoodTag )
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseTagsScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._inGoodTag )
- this._sb.append( ">" );
-
- this._inGoodTag = true;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseTagsScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._tagLookup[ tag.toLowerCase() ] == null )
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseTagsScanner_attribute( name, code )
- {
- try {
-
- if ( this._inGoodTag )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveOnlyTheseAttributesScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The RemoveOnlyTheseAttributesScanner removes just the specified attributes from ANY
- // tag in the provided HTML string. As an example, this HTML:
- //
- // <html><body><p class=MsoNormal>Hello</p></body></html>
- //
- // With a filter of { 'class': 1 } would result in this HTML:
- //
- // <html><body><p>Hello </p></body></html>
-
- function RemoveOnlyTheseAttributesScanner( attributeLookup )
- {
- this._attributeLookup = attributeLookup;
- }
-
- // External methods
-
- RemoveOnlyTheseAttributesScanner.prototype.scan = RemoveOnlyTheseAttributesScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveOnlyTheseAttributesScanner.prototype.directive = RemoveOnlyTheseAttributesScanner_directive;
- RemoveOnlyTheseAttributesScanner.prototype.text = RemoveOnlyTheseAttributesScanner_text;
- RemoveOnlyTheseAttributesScanner.prototype.openTagBegin = RemoveOnlyTheseAttributesScanner_openTagBegin;
- RemoveOnlyTheseAttributesScanner.prototype.openTagEnd = RemoveOnlyTheseAttributesScanner_openTagEnd;
- RemoveOnlyTheseAttributesScanner.prototype.closeTagBegin = RemoveOnlyTheseAttributesScanner_closeTagBegin;
- RemoveOnlyTheseAttributesScanner.prototype.attribute = RemoveOnlyTheseAttributesScanner_attribute;
-
- function RemoveOnlyTheseAttributesScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveOnlyTheseAttributesScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveOnlyTheseAttributesScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseAttributesScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseAttributesScanner_openTagBegin( tag, offset )
- {
- try {
-
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseAttributesScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- this._sb.append( ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseAttributesScanner_closeTagBegin( tag, offset )
- {
- try {
-
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveOnlyTheseAttributesScanner_attribute( name, code )
- {
- try {
-
- if ( this._attributeLookup[ name.toLowerCase() ] == null )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // MapTagNamesScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The MapTagNamesScanner is a very specialized scanner. It takes a regular expression
- // and maps any tag name that matches that expression to the given name. For example,
- // given this HTML:
- //
- // <html><body><h1>Level One</h1><h2>Level Two</h2></body></html>
- //
- // With matchCriteria = new Regexp( /h[12]/ ) and outName = "p", you would get:
- //
- // <html><body><p>Level One </p><p>Level Two </p></body></html>
- //
-
- function MapTagNamesScanner( matchCriteria, outName )
- {
- this._matchCriteria = matchCriteria;
- this._outName = outName;
- }
-
- // External methods
-
- MapTagNamesScanner.prototype.scan = MapTagNamesScanner_scan;
-
- // scanSourceString specific methods
-
- MapTagNamesScanner.prototype.directive = MapTagNamesScanner_directive;
- MapTagNamesScanner.prototype.text = MapTagNamesScanner_text;
- MapTagNamesScanner.prototype.openTagBegin = MapTagNamesScanner_openTagBegin;
- MapTagNamesScanner.prototype.openTagEnd = MapTagNamesScanner_openTagEnd;
- MapTagNamesScanner.prototype.closeTagBegin = MapTagNamesScanner_closeTagBegin;
- MapTagNamesScanner.prototype.attribute = MapTagNamesScanner_attribute;
-
- function MapTagNamesScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "MapTagNamesScanner bad scan" );
-
- return this._sb.get();
- }
-
- function MapTagNamesScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function MapTagNamesScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function MapTagNamesScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._matchCriteria.exec( tag ) )
- tag = this._outName;
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function MapTagNamesScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- this._sb.append( ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function MapTagNamesScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._matchCriteria.exec( tag ) )
- tag = this._outName;
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function MapTagNamesScanner_attribute( name, code )
- {
- try {
-
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
- //---------------------------------------------------------------------------------------------------------
- // AddStylesScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The AddStylesScanner is meant to do just that, add the specified styles to the tags
- // of the type specified. For example, with the following HTML:
- //
- // <html><body><p>Level One</p></body></html>
- //
- // Passing tagName = "p" and styles = { 'margin-top':0, 'margin-bottom':0 } you would get:
- //
- // <html><body><p style="margin-top:0;margin-bottom:0">Level One </p></body></html>
- //
-
- function AddStylesScanner( tagName, styles )
- {
- this._tagName = tagName.toLowerCase();
- this._styles = styles;
- }
-
- // External methods
-
- AddStylesScanner.prototype.scan = AddStylesScanner_scan;
-
- // scanSourceString specific methods
-
- AddStylesScanner.prototype.directive = AddStylesScanner_directive;
- AddStylesScanner.prototype.text = AddStylesScanner_text;
- AddStylesScanner.prototype.openTagBegin = AddStylesScanner_openTagBegin;
- AddStylesScanner.prototype.openTagEnd = AddStylesScanner_openTagEnd;
- AddStylesScanner.prototype.closeTagBegin = AddStylesScanner_closeTagBegin;
- AddStylesScanner.prototype.attribute = AddStylesScanner_attribute;
-
- function AddStylesScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._fixup = false;
- this._foundStyle = false;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "AddStylesScanner bad scan" );
-
- return this._sb.get();
- }
-
- function AddStylesScanner_directive( code, offset )
- {
- try {
-
- this._sb.append( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function AddStylesScanner_text( code, offset )
- {
- try {
-
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function AddStylesScanner_openTagBegin( tag, offset )
- {
- try {
-
- if ( this._tagName == tag.toLowerCase() )
- this._fixup = true;
-
- this._foundStyle = false;
-
- this._sb.append( "<" + tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function AddStylesScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._fixup == true && this._foundStyle == false )
- {
- var code = Utils_BuildStyle( this._styles );
-
- this._sb.append( " style=\"" + code + "\"" );
- }
-
- this._sb.append( ">" );
-
- this._fixup = false;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function AddStylesScanner_closeTagBegin( tag, offset )
- {
- try {
-
- this._sb.append( "</" + tag + ">" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function AddStylesScanner_attribute( name, code )
- {
- try {
-
- if ( this._fixup && name.toLowerCase() == "style" )
- {
- var styles = Utils_ParseStyle( code );
-
- for( var style in this._styles )
- styles[ style ] = this._styles[ style ];
-
- code = Utils_BuildStyle( styles );
-
- this._foundStyle = true;
- }
-
- if ( code.length > 0 )
- this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // StructureScanner
- //---------------------------------------------------------------------------------------------------------
-
- // The StructureScanner is the most complex of all of the scanners. Given proper HTML it will create an
- // internal representation of the HTML using associative arrays and arrays, and then reconstruct the HTML
- // while calling member functions (which can be overidden) at key points.
- //
- // Given this HTML:
- //
- // <html><body><p>This is a test</p></body></html>
- //
- // The internal representation would be:
- //
- // { type: 'root',
- // children: [
- // { type: 'tag',
- // tag: 'html',
- // attributes: {},
- // children: [
- // { type: 'tag',
- // tag: 'body',
- // attributes: {},
- // children: [
- // { type: 'tag',
- // tag: 'p',
- // attributes: {},
- // children: [
- // { type: 'text',
- // text: 'This is a test'
- // }
- // }
- // }
- // }
- // }
-
- // The overide methods descibed below are called after the structure has been created, during
- // the phase where the new HTML text is created.
-
- // -- StructureScanner.prototype.inspectTag( tag )
- //
- // tag - The root of this tag structure
- //
- // This is called first time the tag is seen during the creation phase. Here you can alter
- // the tag before it is sent to the output. You can change the tag name, remove or add attributes,
- // and alter the children from here on down.
-
- // -- StructureScanner.prototype.startTag( tag )
- //
- // tag - The root of this tag structure
- //
- // For tags (not text) start tag is created before the child nodes are turned into HTML.
-
- // -- StructureScanner.prototype.createTag( tag, attributes, closed )
- //
- // tag - The tag name
- // attributes - The associative array of attributes
- // closed - true if the tag was both opened and closed officially (e.g. <p> and </p>)
- //
- // This is called to create the HTML for the tag. This method does not need to handle the child
- // nodes, those are handled by the structure parser (if you want to alter those see inspectTag.)
- // The output from this should either be null (which means that StructureScanner should handle
- // the tag) or an associative array with postfix and prefix attributes. The postfix is how the
- // tag should end and prefix is how the tag should start.
-
- // -- StructureScanner.prototype.endTag( tag )
- //
- // tag - The root of this tag structure
- //
- // The opposite number of start tag.
-
- // -- StructureScanner.prototype.finalizeTag( tag, attributes, closed, childHTML )
- //
- // tag - The tag name
- // attributes - The associative array of attributes
- // closed - true if the tag was both opened and closed officially (e.g. <p> and </p>)
- // childHTML - The finalized HTML of all of the children
- //
- // This is called as a final approval of the tag. If false is returned then the tag (and all of
- // it's children) are not added into the HTML stream.
-
- function StructureScanner( ) { }
-
- // External methods
-
- StructureScanner.prototype.scan = StructureScanner_scan;
-
- // scanSourceString methods
-
- StructureScanner.prototype.directive = StructureScanner_directive;
- StructureScanner.prototype.text = StructureScanner_text;
- StructureScanner.prototype.openTagBegin = StructureScanner_openTagBegin;
- StructureScanner.prototype.closeTagBegin = StructureScanner_closeTagBegin;
- StructureScanner.prototype.attribute = StructureScanner_attribute;
-
- // Internal methods to build the structure
-
- StructureScanner.prototype.addTextChild = StructureScanner_addTextChild;
- StructureScanner.prototype.addTagChild = StructureScanner_addTagChild;
- StructureScanner.prototype.addAttribute = StructureScanner_addAttribute;
- StructureScanner.prototype.finishTag = StructureScanner_finishTag;
- StructureScanner.prototype.buildHTML = StructureScanner_buildHTML;
-
- // Methods to overide
-
- StructureScanner.prototype.inspectTag = StructureScanner_inspectTag;
- StructureScanner.prototype.startTag = StructureScanner_startTag;
- StructureScanner.prototype.createTag = StructureScanner_createTag;
- StructureScanner.prototype.finalizeTag = StructureScanner_finalizeTag;
- StructureScanner.prototype.endTag = StructureScanner_endTag;
-
- function StructureScanner_addTextChild( text )
- {
- this._curTag.children.push( { type: "text", text: text } );
- }
-
- function StructureScanner_addTagChild( tag )
- {
- tag = tag.toLowerCase();
-
- var node = { type: "tag", tag: tag, attributes: {}, children: [], closed: false };
- this._curTag.children.push( node );
- this._curTag = node;
- this._opStack.push( node );
- }
-
- function StructureScanner_addAttribute( name, value )
- {
- name = name.toLowerCase();
-
- this._curTag.attributes[ name ] = value;
- }
-
- function StructureScanner_finishTag( tag )
- {
- tag = tag.toLowerCase();
-
- var aTag = this._opStack.pop();
-
- while( aTag != null )
- {
- if ( aTag.tag == tag )
- {
- aTag.closed = true;
- break;
- }
- aTag = this._opStack.pop();
- }
-
- this._curTag = this._opStack[ this._opStack.length - 1 ];
- }
-
- function StructureScanner_buildHTML( tag )
- {
- this.inspectTag( tag );
-
- var prefix = "";
- var postfix = "";
- var chldHTML = "";
-
- if ( tag.type == "text" )
- {
- prefix = tag.text;
- }
- else
- {
- this.startTag( tag );
-
- if ( tag.type == "tag" )
- {
- var retVal = this.createTag( tag.tag, tag.attributes, tag.closed );
-
- if ( retVal == null )
- retVal = StructureScanner_createTag( tag.tag, tag.attributes, tag.closed );
-
- prefix = retVal.prefix;
- postfix = retVal.postfix;
- }
-
- for( var index in tag.children )
- chldHTML += this.buildHTML( tag.children[ index ] );
-
- this.endTag( tag );
-
- if ( this.finalizeTag( tag.tag, tag.attributes, tag.closed, chldHTML ) == false )
- {
- prefix = "";
- childHTML = "";
- postfix = "";
- }
- }
-
- return prefix + chldHTML + postfix;
- }
-
- function StructureScanner_finalizeTag( tag ) { return true; }
-
- function StructureScanner_startTag( tag ) { }
-
- function StructureScanner_endTag( tag ) { }
-
- function StructureScanner_inspectTag( tag ) { return tag; }
-
- function StructureScanner_createTag( tag, attributes, closed )
- {
- var prefix = "";
- var postfix = "";
-
- prefix = "<" + tag;
- for( var key in attributes )
- {
- if ( attributes[ key ] != null )
- prefix += " " + key + "=\"" + handleAttributeText(attributes[ key ]) + "\"";
- }
- prefix += ">";
-
- if ( closed )
- postfix = "</" + tag + ">";
-
- return { prefix: prefix, postfix: postfix };
- }
-
-
- function StructureScanner_scan( source )
- {
- var rootTag = { type: "root", children: [] };
- this._curTag = rootTag;
- this._opStack = [ this._curTag ];
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "StructureScanner bad scan" );
-
- var html = "";
-
- html = this.buildHTML( rootTag );
-
- return html;
- }
-
- function StructureScanner_directive( code, offset )
- {
- try {
-
- this.addTextChild( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function StructureScanner_text( code, offset )
- {
- try {
-
- this.addTextChild( code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function StructureScanner_openTagBegin( tag, offset )
- {
- try {
-
- this.addTagChild( tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function StructureScanner_closeTagBegin( tag, offset )
- {
- try {
-
- this.finishTag( tag );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function StructureScanner_attribute( name, code )
- {
- try {
-
- this.addAttribute( name, code );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // RemoveHiddenSpansScanner
- //---------------------------------------------------------------------------------------------------------
-
- // This scanner is specific to MS products. This removes any DIV tags within the document.
-
- function RemoveHiddenSpansScanner( )
- {
- }
-
- // External methods
-
- RemoveHiddenSpansScanner.prototype.scan = RemoveHiddenSpansScanner_scan;
-
- // scanSourceString specific methods
-
- RemoveHiddenSpansScanner.prototype.directive = RemoveHiddenSpansScanner_directive;
- RemoveHiddenSpansScanner.prototype.text = RemoveHiddenSpansScanner_text;
- RemoveHiddenSpansScanner.prototype.openTagBegin = RemoveHiddenSpansScanner_openTagBegin;
- RemoveHiddenSpansScanner.prototype.openTagEnd = RemoveHiddenSpansScanner_openTagEnd;
- RemoveHiddenSpansScanner.prototype.closeTagBegin = RemoveHiddenSpansScanner_closeTagBegin;
- RemoveHiddenSpansScanner.prototype.attribute = RemoveHiddenSpansScanner_attribute;
-
- function RemoveHiddenSpansScanner_scan( source, context )
- {
- this._sb = context.createStringBuffer();
- this._attributes = [];
- this._hiddenSpanDepth = 0;
- this._tag = "";
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "RemoveHiddenSpansScanner bad scan" );
-
- return this._sb.get();
- }
-
- function RemoveHiddenSpansScanner_directive( code, offset )
- {
- try {
-
- if ( this._hiddenSpanDepth == 0 )
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveHiddenSpansScanner_text( code, offset )
- {
- try {
-
- if ( this._hiddenSpanDepth == 0 )
- this._sb.append( code + " " );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveHiddenSpansScanner_openTagBegin( tag, offset )
- {
- try {
-
- this._tag = tag;
- this._attributes = [];
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveHiddenSpansScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- if ( this._hiddenSpanDepth == 0 )
- {
- if ( ( this._tag == "span" && this._attributes[ "style" ] ) )
- {
- if ( this._attributes[ "style" ].match( /display:none/ ) )
- {
- this._hiddenSpanDepth = 1;
- }
- }
-
- if ( ( this._tag == "p" && this._attributes[ "class" ] ) )
- {
- if ( this._attributes[ "class" ].match( /MsoCommentText/ ) )
- {
- this._hiddenSpanDepth = 1;
- }
- }
-
- if ( ( this._tag == "span" && this._attributes[ "class" ] ) )
- {
- if ( this._attributes[ "class" ].match( /MsoCommentReference/ ) )
- {
- this._hiddenSpanDepth = 1;
- }
- }
-
- if ( this._hiddenSpanDepth == 0 )
- {
- this._sb.append( "<" + this._tag );
- for( key in this._attributes )
- {
- this._sb.append( " " + key + "=\"" + handleAttributeText(this._attributes[ key ]) + "\"" );
- }
- this._sb.append( ">" );
- }
- }
- else
- this._hiddenSpanDepth++;
-
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveHiddenSpansScanner_attribute( name, code )
- {
- try {
-
- if ( this._hiddenSpanDepth == 0 )
- this._attributes[ name ] = code;
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function RemoveHiddenSpansScanner_closeTagBegin( tag, offset )
- {
- try {
-
- if ( this._hiddenSpanDepth == 0 )
- {
- this._sb.append( "</" + tag + ">" );
- }
- else
- {
- this._hiddenSpanDepth--;
- }
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-
-
-
-
- //---------------------------------------------------------------------------------------------------------
- // DebugScanner
- //---------------------------------------------------------------------------------------------------------
-
- function DebugScanner( )
- {
- }
-
- // External methods
-
- DebugScanner.prototype.scan = DebugScanner_scan;
-
- // scanSourceString specific methods
-
- DebugScanner.prototype.directive = DebugScanner_directive;
- DebugScanner.prototype.text = DebugScanner_text;
- DebugScanner.prototype.openTagBegin = DebugScanner_openTagBegin;
- DebugScanner.prototype.openTagEnd = DebugScanner_openTagEnd;
- DebugScanner.prototype.closeTagBegin = DebugScanner_closeTagBegin;
- DebugScanner.prototype.attribute = DebugScanner_attribute;
-
- function DebugScanner_scan( source )
- {
- this._exceptionTag = false;
- this._scan_error = false;
-
- dw.scanSourceString( source, this );
-
- if ( this._scan_error )
- throw( "DebugScanner bad scan" );
- }
-
- function DebugScanner_directive( code, offset )
- {
- try {
-
- alert( "directive( " + code + ", " + offset + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function DebugScanner_text( code, offset )
- {
- try {
-
- alert( "text( " + code + ", " + offset + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function DebugScanner_openTagBegin( tag, offset )
- {
- try {
-
- alert( "openTagBegin( " + tag + ", " + offset + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function DebugScanner_openTagEnd( tag, trailingFormat )
- {
- try {
-
- alert( "openTagEnd( " + tag + ", " + trailingFormat + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function DebugScanner_closeTagBegin( tag, offset )
- {
- try {
-
- alert( "closeTagBegin( " + tag + ", " + offset + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
- function DebugScanner_attribute( name, code )
- {
- try {
-
- alert( "attribute( " + name + ", " + handleAttributeText(code) + " )" );
-
- } catch( e ) {
-
- this._scan_error = true;
- return false;
-
- }
-
- return true;
- }
-
-