home *** CD-ROM | disk | FTP | other *** search
- package HTML::FormatPS;
-
- # $Id: FormatPS.pm,v 1.21 1997/08/16 10:34:31 aas Exp $
-
- $DEFAULT_PAGESIZE = "A4";
-
- =head1 NAME
-
- HTML::FormatPS - Format HTML as postscript
-
- =head1 SYNOPSIS
-
- require HTML::FormatPS;
- $html = parse_htmlfile("test.html");
- $formatter = new HTML::FormatPS
- FontFamily => 'Helvetica',
- PaperSize => 'Letter';
- print $formatter->format($html);
-
- =head1 DESCRIPTION
-
- The HTML::FormatPS is a formatter that outputs PostScript code.
- Formatting of HTML tables and forms is not implemented.
-
- You might specify the following parameters when constructing the formatter:
-
- =over 4
-
- =item PaperSize
-
- What kind of paper should we format for. The value can be one of
- these: A3, A4, A5, B4, B5, Letter, Legal, Executive, Tabloid,
- Statement, Folio, 10x14, Quarto.
-
- The default is "A4".
-
- =item PaperWidth
-
- The width of the paper in points. Setting PaperSize also defines this
- value.
-
- =item PaperHeight
-
- The height of the paper in points. Setting PaperSize also defines
- this value.
-
- =item LeftMargin
-
- The left margin in points.
-
- =item RightMargin
-
- The right margin in points.
-
- =item HorizontalMargin
-
- Both left and right margin at the same time. The default value is 4 cm.
-
- =item TopMargin
-
- The top margin in points.
-
- =item BottomMargin
-
- The bottom margin in points.
-
- =item VerticalMargin
-
- Both top and bottom margin at the same time. The default value is 2 cm.
-
- =item PageNo
-
- The parameter determines if we should put page numbers on the pages.
- The default is yes, so you have to set this value to 0 in order to
- suppress page numbers.
-
- =item FontFamily
-
- The parameter specifies which family of fonts to use for the formatting.
- Legal values are "Courier", "Helvetica" and "Times". The default is
- "Times".
-
- =item FontScale
-
- All fontsizes might be scaled by this factor.
-
- =item Leading
-
- How much space between lines. This is a factor of the fontsize used
- for that line. Default is 0.1.
-
- =back
-
- =head1 SEE ALSO
-
- L<HTML::Formatter>
-
- =head1 COPYRIGHT
-
- Copyright (c) 1995 Gisle Aas. All rights reserved.
-
- This library is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
- =head1 AUTHOR
-
- Gisle Aas <aas@oslonett.no>
-
- =cut
-
- use Carp;
-
- require HTML::Formatter;
- @ISA = qw(HTML::Formatter);
-
- # A few routines that convert lengths into points
- sub mm { $_[0] * 72 / 25.4; }
- sub in { $_[0] * 72; }
-
- %PaperSizes =
- (
- A3 => [mm(297), mm(420)],
- A4 => [mm(210), mm(297)],
- A5 => [mm(148), mm(210)],
- B4 => [729, 1032 ],
- B5 => [516, 729 ],
- Letter => [in(8.5), in(11) ],
- Legal => [in(8.5), in(14) ],
- Executive => [in(7.5), in(10) ],
- Tabloid => [in(11), in(17) ],
- Statement => [in(5.5), in(8.5)],
- Folio => [in(8.5), in(13) ],
- "10x14" => [in(10), in(14) ],
- Quarto => [610, 780 ],
- );
-
- %FontFamilies =
- (
- Courier => [qw(Courier
- Courier-Bold
- Courier-Oblique
- Courier-BoldOblique)],
-
- Helvetica => [qw(Helvetica
- Helvetica-Bold
- Helvetica-Oblique
- Helvetica-BoldOblique)],
-
- Times => [qw(Times-Roman
- Times-Bold
- Times-Italic
- Times-BoldItalic)],
- );
-
- # size 0 1 2 3 4 5 6 7
- @FontSizes = ( 5, 6, 8, 10, 12, 14, 18, 24, 32);
-
- sub BOLD { 0x01; }
- sub ITALIC { 0x02; }
-
- %param =
- (
- papersize => 'papersize',
- paperwidth => 'paperwidth',
- paperheight => 'paperheigth',
- leftmargin => 'lmW',
- rightmargin => 'rmW',
- horizontalmargin => 'mW',
- topmargin => 'tmH',
- bottommargin => 'bmH',
- verticalmargin => 'mH',
- pageno => 'printpageno',
- fontfamily => 'family',
- fontscale => 'fontscale',
- leading => 'leading',
- );
-
-
- sub new
- {
- my $class = shift;
-
- # Set up defaults
- my $self = bless {
- family => "Times",
- mH => mm(40),
- mW => mm(20),
- printpageno => 1,
- fontscale => 1,
- leading => 0.1,
- }, $class;
- $self->papersize($DEFAULT_PAGESIZE);
-
- # Parse constructor arguments (might override defaults)
- while (($key, $val) = splice(@_, 0, 2)) {
- $key = lc $key;
- croak "Illegal parameter ($key => $val)" unless exists $param{$key};
- $key = $param{$key};
- {
- $key eq "family" && do {
- $val = "\u\L$val";
- croak "Unknown font family ($val)"
- unless exists $FontFamilies{$val};
- $self->{family} = $val;
- last;
- };
- $key eq "papersize" && do {
- $self->papersize($val) || croak "Unknown papersize ($val)";
- last;
- };
- $self->{$key} = lc $val;
- }
- }
- $self->{title} = "";
- $self;
- }
-
-
- sub papersize
- {
- my($self, $val) = @_;
- $val = "\u\L$val";
- my($width, $height) = @{$PaperSizes{$val}};
- return 0 unless defined $width;
- $self->{papersize} = $val;
- $self->{paperwidth} = $width;
- $self->{paperheight} = $height;
- 1;
- }
-
-
- sub fontsize
- {
- my $self = shift;
- my $size = $self->{font_size}[-1];
- $size = 8 if $size > 8;
- $size = 3 if $size < 0;
- $FontSizes[$size] * $self->{fontscale};
- }
-
-
- sub findfont
- {
- my($self, $plain_with_size) = @_;
- my $index = 0;
- my $family = $self->{family} || 'Times';
- my $size = $plain_with_size;
- unless ($plain_with_size) {
- $index |= BOLD if $self->{bold};
- $index |= ITALIC if $self->{italic} || $self->{underline};
- $family = 'Courier' if $self->{teletype};
- $size = $self->fontsize;
- }
- my $font = $FontFamilies{$family}[$index];
- my $font_with_size = "$font-$size";
- if ($self->{currentfont} eq $font_with_size) {
- return "";
- }
- $self->{currentfont} = $font_with_size;
- $self->{pointsize} = $size;
- my $fontmod = "Font::Metrics::$font";
- $fontmod =~ s/-//g;
- my $fontfile = $fontmod . ".pm";
- $fontfile =~ s,::,/,g;
- require $fontfile;
- $self->{wx} = \@{ "${fontmod}::wx" };
- $font = $self->{fonts}{$font_with_size} || do {
- my $fontID = "F" . ++$self->{fno};
- $self->{fonts}{$font_with_size} = $fontID;
- $fontID;
- };
- "$font SF";
- }
-
-
- sub width
- {
- my $self = shift;
- my $w = 0;
- my $wx = $self->{wx};
- my $sz = $self->{pointsize};
- for (unpack("C*", $_[0])) {
- $w += $wx->[$_] * $sz;
- }
- $w;
- }
-
-
- sub begin
- {
- my $self = shift;
- $self->HTML::Formatter::begin;
-
- # Margins is points
- $self->{lm} = $self->{lmW} || $self->{mW};
- $self->{rm} = $self->{paperwidth} - ($self->{rmW} || $self->{mW});
- $self->{tm} = $self->{paperheight} - ($self->{tmH} || $self->{mH});
- $self->{bm} = $self->{bmH} || $self->{mH};
-
- # Font setup
- $self->{fno} = 0;
- $self->{fonts} = {};
- $self->{en} = 0.55 * $self->fontsize(3);
-
- # Initial position
- $self->{xpos} = $self->{lm}; # top of the current line
- $self->{ypos} = $self->{tm};
-
- $self->{pageno} = 1;
-
- $self->{line} = "";
- $self->{showstring} = "";
- $self->{currentfont} = "";
- $self->{prev_currentfont} = "";
- $self->{largest_pointsize} = 0;
-
- $self->newpage;
- }
-
-
- sub end
- {
- my $self = shift;
- $self->showline;
- $self->endpage if $self->{out};
- my $pages = $self->{pageno} - 1;
-
- my @prolog = ();
- push(@prolog, "%!PS-Adobe-3.0\n");
- #push(@prolog,"%%Title: No title\n"); # should look for the <title> element
- push(@prolog, "%%Creator: HTML::FormatPS (libwww-perl)\n");
- push(@prolog, "%%CreationDate: " . localtime() . "\n");
- push(@prolog, "%%Pages: $pages\n");
- push(@prolog, "%%PageOrder: Ascend\n");
- push(@prolog, "%%Orientation: Portrait\n");
- my($pw, $ph) = map { int($_); } @{$self}{qw(paperwidth paperheight)};
-
- push(@prolog, "%%DocumentMedia: Plain $pw $ph 0 white ()\n");
- push(@prolog, "%%DocumentNeededResources: \n");
- my($full, %seenfont);
- for $full (sort keys %{$self->{fonts}}) {
- $full =~ s/-\d+$//;
- next if $seenfont{$full}++;
- push(@prolog, "%%+ font $full\n");
- }
- push(@prolog, "%%DocumentSuppliedResources: procset newencode 1.0 0\n");
- push(@prolog, "%%+ encoding ISOLatin1Encoding\n");
- push(@prolog, "%%EndComments\n");
- push(@prolog, <<'EOT');
-
- %%BeginProlog
- /S/show load def
- /M/moveto load def
- /SF/setfont load def
-
- %%BeginResource: encoding ISOLatin1Encoding
- systemdict /ISOLatin1Encoding known not {
- /ISOLatin1Encoding [
- /space /space /space /space /space /space /space /space
- /space /space /space /space /space /space /space /space
- /space /space /space /space /space /space /space /space
- /space /space /space /space /space /space /space /space
- /space /exclam /quotedbl /numbersign /dollar /percent /ampersand
- /quoteright
- /parenleft /parenright /asterisk /plus /comma /minus /period /slash
- /zero /one /two /three /four /five /six /seven
- /eight /nine /colon /semicolon /less /equal /greater /question
- /at /A /B /C /D /E /F /G
- /H /I /J /K /L /M /N /O
- /P /Q /R /S /T /U /V /W
- /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
- /quoteleft /a /b /c /d /e /f /g
- /h /i /j /k /l /m /n /o
- /p /q /r /s /t /u /v /w
- /x /y /z /braceleft /bar /braceright /asciitilde /space
- /space /space /space /space /space /space /space /space
- /space /space /space /space /space /space /space /space
- /dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
- /dieresis /space /ring /cedilla /space /hungarumlaut /ogonek /caron
- /space /exclamdown /cent /sterling /currency /yen /brokenbar /section
- /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen
- /registered /macron
- /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph
- /periodcentered
- /cedillar /onesuperior /ordmasculine /guillemotright /onequarter
- /onehalf /threequarters /questiondown
- /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
- /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex
- /Idieresis
- /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply
- /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn
- /germandbls
- /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla
- /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex
- /idieresis
- /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide
- /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn
- /ydieresis
- ] def
- } if
- %%EndResource
- %%BeginResource: procset newencode 1.0 0
- /NE { %def
- findfont begin
- currentdict dup length dict begin
- { %forall
- 1 index/FID ne {def} {pop pop} ifelse
- } forall
- /FontName exch def
- /Encoding exch def
- currentdict dup
- end
- end
- /FontName get exch definefont pop
- } bind def
- %%EndResource
- %%EndProlog
- EOT
-
- push(@prolog, "\n%%BeginSetup\n");
- for $full (sort keys %{$self->{fonts}}) {
- my $short = $self->{fonts}{$full};
- $full =~ s/-(\d+)$//;
- my $size = $1;
- push(@prolog, "ISOLatin1Encoding/$full-ISO/$full NE\n");
- push(@prolog, "/$short/$full-ISO findfont $size scalefont def\n");
- }
- push(@prolog, "%%EndSetup\n");
-
- $self->collect("\n%%Trailer\n%%EOF\n");
- unshift(@{$self->{output}}, @prolog);
- }
-
-
- sub header_start
- {
- my($self, $level, $node) = @_;
- # If we are close enough to be bottom of the page, start a new page
- # instead of this:
- $self->vspace(1 + (6-$level) * 0.4);
- $self->eat_leading_space;
- $self->{bold}++;
- push(@{$self->{font_size}}, 8 - $level);
- 1;
- }
-
-
- sub header_end
- {
- my($self, $level, $node) = @_;
- $self->vspace(1);
- $self->{bold}--;
- pop(@{$self->{font_size}});
- 1;
- }
-
- sub hr_start
- {
- my $self = shift;
- $self->showline;
- $self->vspace(0.5);
- $self->skip_vspace;
- my $lm = $self->{lm};
- my $rm = $self->{rm};
- my $y = $self->{ypos};
- $self->collect(sprintf "newpath %.1f %.1f M %.1f %.1f lineto stroke\n",
- $lm, $y, $rm, $y);
- $self->vspace(0.5);
- }
-
-
- sub skip_vspace
- {
- my $self = shift;
- if (defined $self->{vspace}) {
- $self->showline;
- if ($self->{out}) {
- $self->{ypos} -= $self->{vspace} * 10 * $self->{fontscale};
- if ($self->{ypos} < $self->{bm}) {
- $self->newpage;
- }
- }
- $self->{xpos} = $self->{lm};
- $self->{vspace} = undef;
- }
- }
-
-
- sub show
- {
- my $self = shift;
- my $str = $self->{showstring};
- return unless length $str;
- $str =~ s/([\(\)\\])/\\$1/g; # must escape parentesis
- $self->{line} .= "($str)S\n";
- $self->{showstring} = "";
- }
-
-
- sub showline
- {
- my $self = shift;
- $self->show;
- my $line = $self->{line};
- return unless length $line;
- $self->{ypos} -= $self->{largest_pointsize} || $self->{pointsize};
- if ($self->{ypos} < $self->{bm}) {
- $self->newpage;
- $self->{ypos} -= $self->{pointsize};
- # must set current font again
- my $font = $self->{prev_currentfont};
- if ($font) {
- $self->collect("$self->{fonts}{$font} SF\n");
- }
- }
- my $lm = $self->{lm};
- my $x = $lm;
- if ($self->{center}) {
- # Unfortunately, the center attribute is gone when we get here,
- # so this code is never activated
- my $linewidth = $self->{xpos} - $lm;
- $x += ($self->{rm} - $lm - $linewidth) / 2;
- }
-
- $self->collect(sprintf "%.1f %.1f M\n", $x, $self->{ypos}); # moveto
- $line =~ s/\s\)S$/)S/; # many lines will end with space
- $self->collect($line);
-
- if ($self->{bullet}) {
- # Putting this behind the first line of the list item
- # makes it more likely that we get the right font. We should
- # really set the font that we want to use.
- my $bullet = $self->{bullet};
- if ($bullet eq '*') {
- # There is no character that is really suitable. Lets make
- # filled cirle ourself.
- my $radius = $self->{pointsize} / 4;
- $self->collect(sprintf "newpath %.1f %.1f %.1f 0 360 arc fill\n",
- $self->{bullet_pos} + $radius,
- $self->{ypos} + $radius, $radius);
- } else {
- $self->collect(sprintf "%.1f %.1f M\n", # moveto
- $self->{bullet_pos},
- $self->{ypos});
- $self->collect("($bullet)S\n");
- }
- $self->{bullet} = '';
-
- }
-
- $self->{prev_currentfont} = $self->{currentfont};
- $self->{largest_pointsize} = 0;
- $self->{line} = "";
- $self->{xpos} = $lm;
- # Additional linespacing
- $self->{ypos} -= $self->{leading} * $self->{pointsize};
- }
-
-
- sub endpage
- {
- my $self = shift;
- # End previous page
- $self->collect("showpage\n");
- $self->{pageno}++;
- }
-
-
- sub newpage
- {
- my $self = shift;
- if ($self->{'out'}) {
- $self->endpage;
- }
- $self->{'out'} = 0;
- my $pageno = $self->{pageno};
- $self->collect("\n%%Page: $pageno $pageno\n");
-
- # Print area marker (just for debugging)
- if ($DEBUG) {
- my($llx, $lly, $urx, $ury) = map { sprintf "%.1f", $_}
- @{$self}{qw(lm bm rm tm)};
- $self->collect("gsave 0.1 setlinewidth\n");
- $self->collect("clippath 0.9 setgray fill 1 setgray\n");
- $self->collect("$llx $lly moveto $urx $lly lineto $urx $ury lineto $llx $ury lineto closepath fill\n");
- $self->collect("grestore\n");
- }
-
- # Print page number
- if ($self->{printpageno}) {
- $self->collect("%% Title and pageno\n");
- my $f = $self->findfont(8);
- $self->collect("$f\n") if $f;
- my $x = $self->{paperwidth};
- if ($x) { $x -= 30; } else { $x = 30; }
- $self->collect(sprintf "%.1f 30.0 M($pageno)S\n", $x);
- $x = $self->{lm};
- $self->collect(sprintf "%.1f 30.0 M($self->{title})S\n", $x);
- }
- $self->collect("\n");
-
- $self->{xpos} = $self->{lm};
- $self->{ypos} = $self->{tm};
- }
-
-
- sub out
- {
- my($self, $text) = @_;
- if ($self->{collectingTheTitle}) {
- # Both collect and print the title
- $text =~ s/([\(\)\\])/\\$1/g; # Escape parens.
- $self->{title} .= $text;
- return;
- }
- $self->skip_vspace;
-
- my $font = $self->findfont();
- if (length $font) {
- $self->show;
- $self->{line} .= "$font\n";
- }
- my $w = $self->width($text);
- if ($self->{xpos} + $w > $self->{rm}) {
- $self->showline;
- return if $text =~ /^\s*$/;
- };
- $self->{xpos} += $w;
- $self->{showstring} .= $text;
- $self->{largest_pointsize} = $self->{pointsize}
- if $self->{largest_pointsize} < $self->{pointsize};
- $self->{'out'}++;
- }
-
-
- sub pre_out
- {
- my($self, $text) = @_;
- $self->skip_vspace;
- $self->tt_start;
- my $font = $self->findfont();
- if (length $font) {
- $self->show;
- $self->{line} .= "$font\n";
- }
- while ($text =~ s/(.*)\n//) {
- $self->{'out'}++;
- $self->{showstring} .= $1;
- $self->showline;
- }
- $self->{showstring} .= $text;
- $self->tt_end;
- }
-
- sub bullet
- {
- my($self, $bullet) = @_;
- $self->{bullet} = $bullet;
- $self->{bullet_pos} = $self->{lm};
- }
-
- sub adjust_lm
- {
- my $self = shift;
- $self->showline;
- $self->{lm} += $_[0] * $self->{en};
- }
-
-
- sub adjust_rm
- {
- my $self = shift;
- $self->showline;
- $self->{rm} += $_[0] * $self->{en};
- }
-
- sub head_start {
- 1;
- }
-
- sub head_end {
- 1;
- }
-
- sub title_start {
- my($self) = @_;
- $self->{collectingTheTitle} = 1;
- 1;
- }
-
- sub title_end {
- my($self) = @_;
- $self->{collectingTheTitle} = 0;
- 1;
- }
-
- 1;
-