home *** CD-ROM | disk | FTP | other *** search
- package HTML::Parse;
-
- # $Id: Parse.pm,v 2.5 1998/03/26 20:32:21 aas Exp $
-
- =head1 NAME
-
- HTML::Parse - Depreciated
-
- =head1 SYNOPSIS
-
- use HTML::Parse;
- $h = parse_htmlfile("test.html");
- print $h->dump;
- $h = parse_html("<p>Some more <i>italic</i> text", $h);
- $h->delete;
-
- print parse_htmlfile("index.html")->as_HTML; # tidy up markup in a file
-
- =head1 DESCRIPTION
-
- I<Disclaimer: This module is only provided for backwards compatibility
- with earlier versions of this library. New code shold use the
- HTML::Parser and HTML::TreeBuilder modules directly.>
-
- The C<HTML::Parse> module provides functions to parse HTML documents.
- There are two functions exported by this module:
-
- =over 4
-
- =item parse_html($html, [$obj])
-
- This function is really just a synonym for $obj->parse($html) and $obj
- is assumed to be a subclass of C<HTML::Parser>. Refer to
- L<HTML::Parser> for more documentation.
-
- The $obj will default to an internally created C<HTML::TreeBuilder>
- object configured with strict_comment() turned on. This class
- implements a parser that builds (and is) a HTML syntax tree with
- HTML::Element objects as nodes.
-
- The return value from parse_html() is $obj.
-
- =item parse_htmlfile($file, [$obj])
-
- Same as parse_html(), but obtains HTML text from the named file.
-
- Returns C<undef> if the file could not be opened, or $obj otherwise.
-
- =back
-
- When a C<HTML::TreeBuilder> object is created, the following variables
- control how parsing takes place:
-
- =over 4
-
- =item $HTML::Parse::IMPLICIT_TAGS
-
- Setting this variable to true will instruct the parser to try to
- deduce implicit elements and implicit end tags. If this variable is
- false you get a parse tree that just reflects the text as it stands.
- Might be useful for quick & dirty parsing. Default is true.
-
- Implicit elements have the implicit() attribute set.
-
- =item $HTML::Parse::IGNORE_UNKNOWN
-
- This variable contols whether unknow tags should be represented as
- elements in the parse tree. Default is true.
-
- =item $HTML::Parse::IGNORE_TEXT
-
- Do not represent the text content of elements. This saves space if
- all you want is to examine the structure of the document. Default is
- false.
-
- =item $HTML::Parse::WARN
-
- Call warn() with an apropriate message for syntax errors. Default is
- false.
-
- =back
-
- =head1 SEE ALSO
-
- L<HTML::Parser>, L<HTML::TreeBuilder>, L<HTML::Element>
-
- =head1 COPYRIGHT
-
- Copyright 1995-1998 Gisle Aas. All rights reserved.
-
- This library is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
- =head1 AUTHOR
-
- Gisle Aas <aas@sn.no>
-
- =cut
-
-
- require Exporter;
- @ISA = qw(Exporter);
- @EXPORT = qw(parse_html parse_htmlfile);
-
- use strict;
- use vars qw($VERSION
- $IMPLICIT_TAGS $IGNORE_UNKNOWN $IGNORE_TEXT $WARN);
-
- # Backwards compatability
- $IMPLICIT_TAGS = 1;
- $IGNORE_UNKNOWN = 1;
- $IGNORE_TEXT = 0;
- $WARN = 0;
-
- require HTML::TreeBuilder;
-
- $VERSION = sprintf("%d.%02d", q$Revision: 2.5 $ =~ /(\d+)\.(\d+)/);
-
-
- sub parse_html ($;$)
- {
- my $p = $_[1];
- $p = _new_tree_maker() unless $p;
- $p->parse($_[0]);
- }
-
-
- sub parse_htmlfile ($;$)
- {
- my($file, $p) = @_;
- local(*HTML);
- open(HTML, $file) or return undef;
- $p = _new_tree_maker() unless $p;
- $p->parse_file(\*HTML);
- }
-
- sub _new_tree_maker
- {
- my $p = HTML::TreeBuilder->new(implicit_tags => $IMPLICIT_TAGS,
- ignore_unknown => $IGNORE_UNKNOWN,
- ignore_text => $IGNORE_TEXT,
- 'warn' => $WARN,
- );
- $p->strict_comment(1);
- $p;
- }
-
- 1;
-