home *** CD-ROM | disk | FTP | other *** search
- package Pod::Text;
-
- =head1 NAME
-
- Pod::Text - convert POD data to formatted ASCII text
-
- =head1 SYNOPSIS
-
- use Pod::Text;
-
- pod2text("perlfunc.pod");
-
- Also:
-
- pod2text [B<-a>] [B<->I<width>] < input.pod
-
- =head1 DESCRIPTION
-
- Pod::Text is a module that can convert documentation in the POD format (such
- as can be found throughout the Perl distribution) into formatted ASCII.
- Termcap is optionally supported for boldface/underline, and can enabled via
- C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
- will be used to simulate bold and underlined text.
-
- A separate F<pod2text> program is included that is primarily a wrapper for
- Pod::Text.
-
- The single function C<pod2text()> can take the optional options B<-a>
- for an alternative output format, then a B<->I<width> option with the
- max terminal width, followed by one or two arguments. The first
- should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
- STDIN. A second argument, if provided, should be a filehandle glob where
- output should be sent.
-
- =head1 AUTHOR
-
- Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt>
-
- =head1 TODO
-
- Cleanup work. The input and output locations need to be more flexible,
- termcap shouldn't be a global variable, and the terminal speed needs to
- be properly calculated.
-
- =cut
-
- use Term::Cap;
- require Exporter;
- @ISA = Exporter;
- @EXPORT = qw(pod2text);
-
- use vars qw($VERSION);
- $VERSION = "1.0203";
-
- $termcap=0;
-
- $opt_alt_format = 0;
-
- #$use_format=1;
-
- $UNDL = "\x1b[4m";
- $INV = "\x1b[7m";
- $BOLD = "\x1b[1m";
- $NORM = "\x1b[0m";
-
- sub pod2text {
- shift if $opt_alt_format = ($_[0] eq '-a');
-
- if($termcap and !$setuptermcap) {
- $setuptermcap=1;
-
- my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
- $UNDL = $term->{'_us'};
- $INV = $term->{'_mr'};
- $BOLD = $term->{'_md'};
- $NORM = $term->{'_me'};
- }
-
- $SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
- || $ENV{COLUMNS}
- || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
- || ($^O ne 'MacOS' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
- || 72;
-
- @_ = ("<&STDIN") unless @_;
- local($file,*OUTPUT) = @_;
- *OUTPUT = *STDOUT if @_<2;
-
- local $: = $:;
- $: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''.
-
- $/ = "";
-
- $FANCY = 0;
-
- $cutting = 1;
- $DEF_INDENT = 4;
- $indent = $DEF_INDENT;
- $needspace = 0;
- $begun = "";
-
- open(IN, $file) || die "Couldn't open $file: $!";
-
- POD_DIRECTIVE: while (<IN>) {
- if ($cutting) {
- next unless /^=/;
- $cutting = 0;
- }
- if ($begun) {
- if (/^=end\s+$begun/) {
- $begun = "";
- }
- elsif ($begun eq "text") {
- print OUTPUT $_;
- }
- next;
- }
- 1 while s{^(.*?)(\t+)(.*)$}{
- $1
- . (' ' x (length($2) * 8 - length($1) % 8))
- . $3
- }me;
- # Translate verbatim paragraph
- if (/^\s/) {
- output($_);
- next;
- }
-
- if (/^=for\s+(\S+)\s*(.*)/s) {
- if ($1 eq "text") {
- print OUTPUT $2,"";
- } else {
- # ignore unknown for
- }
- next;
- }
- elsif (/^=begin\s+(\S+)\s*(.*)/s) {
- $begun = $1;
- if ($1 eq "text") {
- print OUTPUT $2."";
- }
- next;
- }
-
- sub prepare_for_output {
-
- s/\s*$/\n/;
- &init_noremap;
-
- # need to hide E<> first; they're processed in clear_noremap
- s/(E<[^<>]+>)/noremap($1)/ge;
- $maxnest = 10;
- while ($maxnest-- && /[A-Z]</) {
- unless ($FANCY) {
- if ($opt_alt_format) {
- s/[BC]<(.*?)>/``$1''/sg;
- s/F<(.*?)>/"$1"/sg;
- } else {
- s/C<(.*?)>/`$1'/sg;
- }
- } else {
- s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
- }
- # s/[IF]<(.*?)>/italic($1)/ge;
- s/I<(.*?)>/*$1*/sg;
- # s/[CB]<(.*?)>/bold($1)/ge;
- s/X<.*?>//sg;
- # LREF: a manpage(3f)
- s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
- # LREF: an =item on another manpage
- s{
- L<
- ([^/]+)
- /
- (
- [:\w]+
- (\(\))?
- )
- >
- } {the "$2" entry in the $1 manpage}gx;
-
- # LREF: an =item on this manpage
- s{
- ((?:
- L<
- /
- (
- [:\w]+
- (\(\))?
- )
- >
- (,?\s+(and\s+)?)?
- )+)
- } { internal_lrefs($1) }gex;
-
- # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
- # the "func" can disambiguate
- s{
- L<
- (?:
- ([a-zA-Z]\S+?) /
- )?
- "?(.*?)"?
- >
- }{
- do {
- $1 # if no $1, assume it means on this page.
- ? "the section on \"$2\" in the $1 manpage"
- : "the section on \"$2\""
- }
- }sgex;
-
- s/[A-Z]<(.*?)>/$1/sg;
- }
- clear_noremap(1);
- }
-
- &prepare_for_output;
-
- if (s/^=//) {
- # $needspace = 0; # Assume this.
- # s/\n/ /g;
- ($Cmd, $_) = split(' ', $_, 2);
- # clear_noremap(1);
- if ($Cmd eq 'cut') {
- $cutting = 1;
- }
- elsif ($Cmd eq 'pod') {
- $cutting = 0;
- }
- elsif ($Cmd eq 'head1') {
- makespace();
- if ($opt_alt_format) {
- print OUTPUT "\n";
- s/^(.+?)[ \t]*$/==== $1 ====/;
- }
- print OUTPUT;
- # print OUTPUT uc($_);
- $needspace = $opt_alt_format;
- }
- elsif ($Cmd eq 'head2') {
- makespace();
- # s/(\w+)/\u\L$1/g;
- #print ' ' x $DEF_INDENT, $_;
- # print "\xA7";
- s/(\w)/\xA7 $1/ if $FANCY;
- if ($opt_alt_format) {
- s/^(.+?)[ \t]*$/== $1 ==/;
- print OUTPUT "\n", $_;
- } else {
- print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
- }
- $needspace = $opt_alt_format;
- }
- elsif ($Cmd eq 'over') {
- push(@indent,$indent);
- $indent += ($_ + 0) || $DEF_INDENT;
- }
- elsif ($Cmd eq 'back') {
- $indent = pop(@indent);
- warn "Unmatched =back\n" unless defined $indent;
- }
- elsif ($Cmd eq 'item') {
- makespace();
- # s/\A(\s*)\*/$1\xb7/ if $FANCY;
- # s/^(\s*\*\s+)/$1 /;
- {
- if (length() + 3 < $indent) {
- my $paratag = $_;
- $_ = <IN>;
- if (/^=/) { # tricked!
- local($indent) = $indent[$#index - 1] || $DEF_INDENT;
- output($paratag);
- redo POD_DIRECTIVE;
- }
- &prepare_for_output;
- IP_output($paratag, $_);
- } else {
- local($indent) = $indent[$#index - 1] || $DEF_INDENT;
- output($_, 0);
- }
- }
- }
- else {
- warn "Unrecognized directive: $Cmd\n";
- }
- }
- else {
- # clear_noremap(1);
- makespace();
- output($_, 1);
- }
- }
-
- close(IN);
-
- }
-
- #########################################################################
-
- sub makespace {
- if ($needspace) {
- print OUTPUT "\n";
- $needspace = 0;
- }
- }
-
- sub bold {
- my $line = shift;
- return $line if $use_format;
- if($termcap) {
- $line = "$BOLD$line$NORM";
- } else {
- $line =~ s/(.)/$1\b$1/g;
- }
- # $line = "$BOLD$line$NORM" if $ansify;
- return $line;
- }
-
- sub italic {
- my $line = shift;
- return $line if $use_format;
- if($termcap) {
- $line = "$UNDL$line$NORM";
- } else {
- $line =~ s/(.)/$1\b_/g;
- }
- # $line = "$UNDL$line$NORM" if $ansify;
- return $line;
- }
-
- # Fill a paragraph including underlined and overstricken chars.
- # It's not perfect for words longer than the margin, and it's probably
- # slow, but it works.
- sub fill {
- local $_ = shift;
- my $par = "";
- my $indent_space = " " x $indent;
- my $marg = $SCREEN-$indent;
- my $line = $indent_space;
- my $line_length;
- foreach (split) {
- my $word_length = length;
- $word_length -= 2 while /\010/g; # Subtract backspaces
-
- if ($line_length + $word_length > $marg) {
- $par .= $line . "\n";
- $line= $indent_space . $_;
- $line_length = $word_length;
- }
- else {
- if ($line_length) {
- $line_length++;
- $line .= " ";
- }
- $line_length += $word_length;
- $line .= $_;
- }
- }
- $par .= "$line\n" if $line;
- $par .= "\n";
- return $par;
- }
-
- sub IP_output {
- local($tag, $_) = @_;
- local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
- $tag_cols = $SCREEN - $tag_indent;
- $cols = $SCREEN - $indent;
- $tag =~ s/\s*$//;
- s/\s+/ /g;
- s/^ //;
- $str = "format OUTPUT = \n"
- . (($opt_alt_format && $tag_indent > 1)
- ? ":" . " " x ($tag_indent - 1)
- : " " x ($tag_indent))
- . '@' . ('<' x ($indent - $tag_indent - 1))
- . "^" . ("<" x ($cols - 1)) . "\n"
- . '$tag, $_'
- . "\n~~"
- . (" " x ($indent-2))
- . "^" . ("<" x ($cols - 5)) . "\n"
- . '$_' . "\n\n.\n1";
- #warn $str; warn "tag is $tag, _ is $_";
- eval $str || die;
- write OUTPUT;
- }
-
- sub output {
- local($_, $reformat) = @_;
- if ($reformat) {
- $cols = $SCREEN - $indent;
- s/\s+/ /g;
- s/^ //;
- $str = "format OUTPUT = \n~~"
- . (" " x ($indent-2))
- . "^" . ("<" x ($cols - 5)) . "\n"
- . '$_' . "\n\n.\n1";
- eval $str || die;
- write OUTPUT;
- } else {
- s/^/' ' x $indent/gem;
- s/^\s+\n$/\n/gm;
- s/^ /: /s if defined($reformat) && $opt_alt_format;
- print OUTPUT;
- }
- }
-
- sub noremap {
- local($thing_to_hide) = shift;
- $thing_to_hide =~ tr/\000-\177/\200-\377/;
- return $thing_to_hide;
- }
-
- sub init_noremap {
- die "unmatched init" if $mapready++;
- #mask off high bit characters in input stream
- s/([\200-\377])/"E<".ord($1).">"/ge;
- }
-
- sub clear_noremap {
- my $ready_to_print = $_[0];
- die "unmatched clear" unless $mapready--;
- tr/\200-\377/\000-\177/;
- # now for the E<>s, which have been hidden until now
- # otherwise the interative \w<> processing would have
- # been hosed by the E<gt>
- s {
- E<
- (
- ( \d+ )
- | ( [A-Za-z]+ )
- )
- >
- } {
- do {
- defined $2
- ? chr($2)
- :
- defined $HTML_Escapes{$3}
- ? do { $HTML_Escapes{$3} }
- : do {
- warn "Unknown escape: E<$1> in $_";
- "E<$1>";
- }
- }
- }egx if $ready_to_print;
- }
-
- sub internal_lrefs {
- local($_) = shift;
- s{L</([^>]+)>}{$1}g;
- my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
- my $retstr = "the ";
- my $i;
- for ($i = 0; $i <= $#items; $i++) {
- $retstr .= "C<$items[$i]>";
- $retstr .= ", " if @items > 2 && $i != $#items;
- $retstr .= " and " if $i+2 == @items;
- }
-
- $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
- . " elsewhere in this document ";
-
- return $retstr;
-
- }
-
- BEGIN {
-
- if ($^O eq 'MacOS') {
- %HTML_Escapes = (
- 'amp' => '&', # ampersand
- 'lt' => '<', # left chevron, less-than
- 'gt' => '>', # right chevron, greater-than
- 'quot' => '"', # double quote
-
- "Aacute" => "´A", # capital A, acute accent
- "aacute" => "á", # small a, acute accent
- "Acirc" => "Â", # capital A, circumflex accent
- "acirc" => "â", # small a, circumflex accent
- "AElig" => "Æ", # capital AE diphthong (ligature)
- "aelig" => "æ", # small ae diphthong (ligature)
- "Agrave" => "À", # capital A, grave accent
- "agrave" => "à", # small a, grave accent
- "Aring" => "Å", # capital A, ring
- "aring" => "å", # small a, ring
- "Atilde" => "Ã", # capital A, tilde
- "atilde" => "ã", # small a, tilde
- "Auml" => "Ä", # capital A, dieresis or umlaut mark
- "auml" => "ä", # small a, dieresis or umlaut mark
- "Ccedil" => "Ç", # capital C, cedilla
- "ccedil" => "ç", # small c, cedilla
- "Eacute" => "É", # capital E, acute accent
- "eacute" => "é", # small e, acute accent
- "Ecirc" => "Ê", # capital E, circumflex accent
- "ecirc" => "ê", # small e, circumflex accent
- "Egrave" => "È", # capital E, grave accent
- "egrave" => "è", # small e, grave accent
- "ETH" => "Eth", # capital Eth, Icelandic
- "eth" => "eth", # small eth, Icelandic
- "Euml" => "Ë", # capital E, dieresis or umlaut mark
- "euml" => "ë", # small e, dieresis or umlaut mark
- "Iacute" => "´I", # capital I, acute accent
- "iacute" => "í", # small i, acute accent
- "Icirc" => "Î", # capital I, circumflex accent
- "icirc" => "î", # small i, circumflex accent
- "Igrave" => "Ì", # capital I, grave accent
- "igrave" => "ì", # small i, grave accent
- "Iuml" => "Ï", # capital I, dieresis or umlaut mark
- "iuml" => "ï", # small i, dieresis or umlaut mark
- "Ntilde" => "Ñ", # capital N, tilde
- "ntilde" => "ñ", # small n, tilde
- "Oacute" => "´O", # capital O, acute accent
- "oacute" => "ó", # small o, acute accent
- "Ocirc" => "Ô", # capital O, circumflex accent
- "ocirc" => "ô", # small o, circumflex accent
- "Ograve" => "Ò", # capital O, grave accent
- "ograve" => "ò", # small o, grave accent
- "Oslash" => "Ø", # capital O, slash
- "oslash" => "ø", # small o, slash
- "Otilde" => "Õ", # capital O, tilde
- "otilde" => "õ", # small o, tilde
- "Ouml" => "Ö", # capital O, dieresis or umlaut mark
- "ouml" => "ö", # small o, dieresis or umlaut mark
- "szlig" => "ß", # small sharp s, German (sz ligature)
- "THORN" => "THORN", # capital THORN, Icelandic
- "thorn" => "thorn", # small thorn, Icelandic
- "Uacute" => "´U", # capital U, acute accent
- "uacute" => "ú", # small u, acute accent
- "Ucirc" => "Û", # capital U, circumflex accent
- "ucirc" => "û", # small u, circumflex accent
- "Ugrave" => "Ù", # capital U, grave accent
- "ugrave" => "ù", # small u, grave accent
- "Uuml" => "Ü", # capital U, dieresis or umlaut mark
- "uuml" => "ü", # small u, dieresis or umlaut mark
- "Yacute" => "´Y", # capital Y, acute accent
- "yacute" => "´y", # small y, acute accent
- "yuml" => "ÿ", # small y, dieresis or umlaut mark
-
- "lchevron" => "«", # left chevron (double less than)
- "rchevron" => "»", # right chevron (double greater than)
- );
- } else {
- %HTML_Escapes = (
- 'amp' => '&', # ampersand
- 'lt' => '<', # left chevron, less-than
- 'gt' => '>', # right chevron, greater-than
- 'quot' => '"', # double quote
-
- "Aacute" => "\xC1", # capital A, acute accent
- "aacute" => "\xE1", # small a, acute accent
- "Acirc" => "\xC2", # capital A, circumflex accent
- "acirc" => "\xE2", # small a, circumflex accent
- "AElig" => "\xC6", # capital AE diphthong (ligature)
- "aelig" => "\xE6", # small ae diphthong (ligature)
- "Agrave" => "\xC0", # capital A, grave accent
- "agrave" => "\xE0", # small a, grave accent
- "Aring" => "\xC5", # capital A, ring
- "aring" => "\xE5", # small a, ring
- "Atilde" => "\xC3", # capital A, tilde
- "atilde" => "\xE3", # small a, tilde
- "Auml" => "\xC4", # capital A, dieresis or umlaut mark
- "auml" => "\xE4", # small a, dieresis or umlaut mark
- "Ccedil" => "\xC7", # capital C, cedilla
- "ccedil" => "\xE7", # small c, cedilla
- "Eacute" => "\xC9", # capital E, acute accent
- "eacute" => "\xE9", # small e, acute accent
- "Ecirc" => "\xCA", # capital E, circumflex accent
- "ecirc" => "\xEA", # small e, circumflex accent
- "Egrave" => "\xC8", # capital E, grave accent
- "egrave" => "\xE8", # small e, grave accent
- "ETH" => "\xD0", # capital Eth, Icelandic
- "eth" => "\xF0", # small eth, Icelandic
- "Euml" => "\xCB", # capital E, dieresis or umlaut mark
- "euml" => "\xEB", # small e, dieresis or umlaut mark
- "Iacute" => "\xCD", # capital I, acute accent
- "iacute" => "\xED", # small i, acute accent
- "Icirc" => "\xCE", # capital I, circumflex accent
- "icirc" => "\xEE", # small i, circumflex accent
- "Igrave" => "\xCD", # capital I, grave accent
- "igrave" => "\xED", # small i, grave accent
- "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
- "iuml" => "\xEF", # small i, dieresis or umlaut mark
- "Ntilde" => "\xD1", # capital N, tilde
- "ntilde" => "\xF1", # small n, tilde
- "Oacute" => "\xD3", # capital O, acute accent
- "oacute" => "\xF3", # small o, acute accent
- "Ocirc" => "\xD4", # capital O, circumflex accent
- "ocirc" => "\xF4", # small o, circumflex accent
- "Ograve" => "\xD2", # capital O, grave accent
- "ograve" => "\xF2", # small o, grave accent
- "Oslash" => "\xD8", # capital O, slash
- "oslash" => "\xF8", # small o, slash
- "Otilde" => "\xD5", # capital O, tilde
- "otilde" => "\xF5", # small o, tilde
- "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
- "ouml" => "\xF6", # small o, dieresis or umlaut mark
- "szlig" => "\xDF", # small sharp s, German (sz ligature)
- "THORN" => "\xDE", # capital THORN, Icelandic
- "thorn" => "\xFE", # small thorn, Icelandic
- "Uacute" => "\xDA", # capital U, acute accent
- "uacute" => "\xFA", # small u, acute accent
- "Ucirc" => "\xDB", # capital U, circumflex accent
- "ucirc" => "\xFB", # small u, circumflex accent
- "Ugrave" => "\xD9", # capital U, grave accent
- "ugrave" => "\xF9", # small u, grave accent
- "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
- "uuml" => "\xFC", # small u, dieresis or umlaut mark
- "Yacute" => "\xDD", # capital Y, acute accent
- "yacute" => "\xFD", # small y, acute accent
- "yuml" => "\xFF", # small y, dieresis or umlaut mark
-
- "lchevron" => "\xAB", # left chevron (double less than)
- "rchevron" => "\xBB", # right chevron (double greater than)
- );
- }
- }
-
- 1;
-