home *** CD-ROM | disk | FTP | other *** search
- # tclparser-8.1.tcl --
- #
- # This file provides a Tcl implementation of a XML parser.
- # This file supports Tcl 8.1.
- #
- # See xml-8.[01].tcl for definitions of character sets and
- # regular expressions.
- #
- # Copyright (c) 1998-2001 Zveno Pty Ltd
- # http://www.zveno.com/
- #
- # Zveno makes this software and all associated data and documentation
- # ('Software') available free of charge for any purpose.
- # Copies may be made of this Software but all of this notice must be included
- # on any copy.
- #
- # The Software was developed for research purposes and Zveno does not warrant
- # that it is error free or fit for any purpose. Zveno disclaims any
- # liability for all claims, expenses, losses, damages and costs any user may
- # incur as a result of using, copying or modifying the Software.
- #
- # Copyright (c) 1997 Australian National University (ANU).
- #
- # ANU makes this software and all associated data and documentation
- # ('Software') available free of charge for any purpose. You may make copies
- # of the Software but you must include all of this notice on any copy.
- #
- # The Software was developed for research purposes and ANU does not warrant
- # that it is error free or fit for any purpose. ANU disclaims any
- # liability for all claims, expenses, losses, damages and costs any user may
- # incur as a result of using, copying or modifying the Software.
- #
- # $Id: tclparser-8.1.tcl,v 1.12 2001/02/26 02:11:11 doss Exp $
-
- package require Tcl 8.1
-
- package provide xml::tclparser 2.0
-
- package require xmldefs 2.0
-
- package require sgmlparser 1.0
-
- namespace eval xml::tclparser {
-
- namespace export create createexternal externalentity parse configure get delete
-
- # Tokenising expressions
-
- variable tokExpr $::xml::tokExpr
- variable substExpr $::xml::substExpr
-
- # Register this parser class
-
- ::xml::parserclass create tcl \
- -createcommand [namespace code create] \
- -createentityparsercommand [namespace code createentityparser] \
- -parsecommand [namespace code parse] \
- -configurecommand [namespace code configure] \
- -deletecommand [namespace code delete]
- }
-
- # xml::tclparser::create --
- #
- # Creates XML parser object.
- #
- # Arguments:
- # name unique identifier for this instance
- #
- # Results:
- # The state variable is initialised.
-
- proc xml::tclparser::create name {
-
- # Initialise state variable
- upvar \#0 [namespace current]::$name parser
- array set parser [list -name $name \
- -final 1 \
- -validate 0 \
- -statevariable [namespace current]::$name \
- -baseurl {} \
- internaldtd {} \
- entities [namespace current]::Entities$name \
- extentities [namespace current]::ExtEntities$name \
- parameterentities [namespace current]::PEntities$name \
- externalparameterentities [namespace current]::ExtPEntities$name \
- elementdecls [namespace current]::ElDecls$name \
- attlistdecls [namespace current]::AttlistDecls$name \
- notationdecls [namespace current]::NotDecls$name \
- depth 0 \
- ]
-
- # Initialise entities with predefined set
- array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]
-
- return $name
- }
-
- # xml::tclparser::createentityparser --
- #
- # Creates XML parser object for an entity.
- #
- # Arguments:
- # name name for the new parser
- # parent name of parent parser
- #
- # Results:
- # The state variable is initialised.
-
- proc xml::tclparser::createentityparser {parent name} {
- upvar #0 [namespace current]::$parent p
-
- # Initialise state variable
- upvar \#0 [namespace current]::$name external
- array set external [array get p]
-
- array set external [list -name $name \
- -statevariable [namespace current]::$name \
- internaldtd {} \
- line 0 \
- ]
- incr external(depth)
-
- return $name
- }
-
- # xml::tclparser::configure --
- #
- # Configures a XML parser object.
- #
- # Arguments:
- # name unique identifier for this instance
- # args option name/value pairs
- #
- # Results:
- # May change values of config options
-
- proc xml::tclparser::configure {name args} {
- upvar \#0 [namespace current]::$name parser
-
- # BUG: very crude, no checks for illegal args
- array set parser $args
-
- return {}
- }
-
- # xml::tclparser::parse --
- #
- # Parses document instance data
- #
- # Arguments:
- # name parser object
- # xml data
- # args configuration options
- #
- # Results:
- # Callbacks are invoked
-
- proc xml::tclparser::parse {name xml args} {
-
- array set options $args
- upvar \#0 [namespace current]::$name parser
- variable tokExpr
- variable substExpr
-
- set parseOptions [list \
- -emptyelement [namespace code ParseEmpty] \
- -parseattributelistcommand [namespace code ParseAttrs] \
- -parseentitydeclcommand [namespace code ParseEntity] \
- -normalize 0]
- eval lappend parseOptions \
- [array get parser -*command] \
- [array get parser -reportempty] \
- [array get parser -name] \
- [array get parser -baseurl] \
- [array get parser -validate] \
- [array get parser -final] \
- [array get parser -defaultexpandinternalentities] \
- [array get parser entities] \
- [array get parser extentities] \
- [array get parser parameterentities] \
- [array get parser externalparameterentities] \
- [array get parser elementdecls] \
- [array get parser attlistdecls] \
- [array get parser notationdecls]
-
- set dtdsubset no
- catch {set dtdsubset $options(-dtdsubset)}
- switch -- $dtdsubset {
- internal {
- # Bypass normal parsing
- lappend parseOptions -statevariable $parser(-statevariable)
- array set intOptions [array get ::sgml::StdOptions]
- array set intOptions $parseOptions
- ::sgml::ParseDTD:Internal [array get intOptions] $xml
- return {}
- }
- external {
- # Bypass normal parsing
- lappend parseOptions -statevariable $parser(-statevariable)
- array set intOptions [array get ::sgml::StdOptions]
- array set intOptions $parseOptions
- ::sgml::ParseDTD:External [array get intOptions] $xml
- return {}
- }
- default {
- # Pass through to normal processing
- }
- }
-
- set tokenised [lrange \
- [::sgml::tokenise $xml \
- $tokExpr \
- $substExpr \
- -internaldtdvariable [namespace current]::${name}(internaldtd)] \
- 4 end]
-
- lappend parseOptions -internaldtd [list $parser(internaldtd)]
- eval ::sgml::parseEvent [list $tokenised] $parseOptions
-
- return {}
- }
-
- # xml::tclparser::ParseEmpty -- Tcl 8.1+ version
- #
- # Used by parser to determine whether an element is empty.
- # This is usually dead easy in XML, but as always not quite.
- # Have to watch out for empty element syntax
- #
- # Arguments:
- # tag element name
- # attr attribute list (raw)
- # e End tag delimiter.
- #
- # Results:
- # Return value of e
-
- proc xml::tclparser::ParseEmpty {tag attr e} {
- switch -glob [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] {
- 0,0 {
- return {}
- }
- 0,* {
- return /
- }
- default {
- return $e
- }
- }
- }
-
- # xml::tclparser::ParseAttrs -- Tcl 8.1+ version
- #
- # Parse element attributes.
- #
- # There are two forms for name-value pairs:
- #
- # name="value"
- # name='value'
- #
- # Arguments:
- # attrs attribute string given in a tag
- #
- # Results:
- # Returns a Tcl list representing the name-value pairs in the
- # attribute string
- #
- # A ">" occurring in the attribute list causes problems when parsing
- # the XML. This manifests itself by an unterminated attribute value
- # and a ">" appearing the element text.
- # In this case return a three element list;
- # the message "unterminated attribute value", the attribute list it
- # did manage to parse and the remainder of the attribute list.
-
- proc xml::tclparser::ParseAttrs attrs {
-
- set result {}
-
- while {[string length [string trim $attrs]]} {
- if {[regexp ($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')([::sgml::cl ^<]*?)\\2(.*) $attrs discard attrName delimiter value attrs]} {
- lappend result $attrName [NormalizeAttValue $value]
- } elseif {[regexp $::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')[::sgml::cl ^<]*\$ $attrs]} {
- return -code error [list {unterminated attribute value} $result $attrs]
- } else {
- return -code error "invalid attribute list"
- }
- }
-
- return $result
- }
-
- # xml::tclparser::NormalizeAttValue --
- #
- # Perform attribute value normalisation. This involves:
- # . character references are appended to the value
- # . entity references are recursively processed and replacement value appended
- # . whitespace characters cause a space to be appended
- # . other characters appended as-is
- #
- # Because no state is passed in here, it's a bit difficult
- # to pass entity references back into the parser for further
- # replacement. I'll just punt on the whole thing for now and do
- # basic normalisation - char refs, pre-defined entities and ws.
- #
- # Arguments:
- # value unparsed attribute value
- #
- # Results:
- # Normalised value returned.
-
- proc xml::tclparser::NormalizeAttValue value {
-
- # sgmlparser already has backslashes protected
- # Protect Tcl specials
- regsub -all {([][$])} $value {\\\1} value
-
- # Deal with white space
- regsub -all "\[$::xml::Wsp\]" $value { } value
-
- # Find entity refs
- regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef {\1}]} value
-
- return [subst $value]
- }
-
- # xml::tclparser::NormalizeAttValue:DeRef --
- #
- # Simplistic handler to normalize attribute values
- #
- # Arguments:
- # ref entity reference
- #
- # Results:
- # Returns character
-
- proc xml::tclparser::NormalizeAttValue:DeRef ref {
- switch -glob -- $ref {
- #x* {
- scan [string range 2 $ref] %x value
- return $value
- }
- #* {
- scan [string range 1 $ref] %d value
- return $value
- }
- lt -
- gt -
- amp -
- quot -
- apos {
- array set map {lt < gt > amp & quot \" apos '}
- return $map($ref)
- }
- default {
- return -code error "unable to resolve entity reference \"$ref\""
- }
- }
- }
-
- # xml::tclparser::ParseEntity --
- #
- # Parse general entity declaration
- #
- # Arguments:
- # data text to parse
- #
- # Results:
- # Tcl list containing entity declaration
-
- proc xml::tclparser::ParseEntity data {
- set data [string trim $data]
- if {[regexp $::sgml::ExternalEntityExpr $data discard type delimiter1 id1 discard delimiter2 id2 optNDATA ndata]} {
- switch $type {
- PUBLIC {
- return [list external $id2 $id1 $ndata]
- }
- SYSTEM {
- return [list external $id1 {} $ndata]
- }
- }
- } elseif {[regexp {^("|')(.*?)\1$} $data discard delimiter value]} {
- return [list internal $value]
- } else {
- return -code error "badly formed entity declaration"
- }
- }
-
- # xml::tclparser::delete --
- #
- # Destroy parser data
- #
- # Arguments:
- # name parser object
- #
- # Results:
- # Parser data structure destroyed
-
- proc xml::tclparser::delete name {
- upvar \#0 [namespace current]::$name parser
- catch {::sgml::ParserDelete $parser(-statevariable)}
- catch {unset parser}
- return {}
- }
-
- # xml::tclparser::get --
- #
- # Retrieve additional information from the parser
- #
- # Arguments:
- # name parser object
- # method info to retrieve
- # args additional arguments for method
- #
- # Results:
- # Depends on method
-
- proc xml::tclparser::get {name method args} {
- upvar #0 [namespace current]::$name parser
-
- switch -- $method {
-
- elementdecl {
- switch [llength $args] {
-
- 0 {
- # Return all element declarations
- upvar #0 $parser(elementdecls) elements
- return [array get elements]
- }
-
- 1 {
- # Return specific element declaration
- upvar #0 $parser(elementdecls) elements
- if {[info exists elements([lindex $args 0])]} {
- return [array get elements [lindex $args 0]]
- } else {
- return -code error "element \"[lindex $args 0]\" not declared"
- }
- }
-
- default {
- return -code error "wrong number of arguments: should be \"elementdecl ?element?\""
- }
- }
- }
-
- attlist {
- if {[llength $args] != 1} {
- return -code error "wrong number of arguments: should be \"get attlist element\""
- }
-
- upvar #0 $parser(attlistdecls)
-
- return {}
- }
-
- entitydecl {
- }
-
- parameterentitydecl {
- }
-
- notationdecl {
- }
-
- default {
- return -code error "unknown method \"$method\""
- }
- }
-
- return {}
- }
-
- # xml::tclparser::ExternalEntity --
- #
- # Resolve and parse external entity
- #
- # Arguments:
- # name parser object
- # base base URL
- # sys system identifier
- # pub public identifier
- #
- # Results:
- # External entity is fetched and parsed
-
- proc xml::tclparser::ExternalEntity {name base sys pub} {
- }
-