home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- #entify.pl: Change Latin-1 high alphabetics to HTML entities for 7-bit safety.
- #
- # Typical use:
- #
- # perl entify.pl infile.8bit > outfile.html
- #
- # If you have Latin 1 characters in a URL, they should actually be escaped
- # using the %-hex-digits convention; the program ignores this consideration.
- #
- # Copyright H. Churchyard 1995 -- freely redistributable.
- # Version 1.0 12/30/94 -- Converted to perl. Included in htmlchek 4.0 release.
- # Version 1.1 2/17/95 -- Eliminated warning, may not have been appropriate in
- # all circumstances.
- #
- eval "exec /usr/local/bin/perl -S $0 $*"
- if $running_under_some_shell; # this emulates #! processing on NIH machines.
- $, = ' '; # set output field separator
- $\ = "\n"; # set output record separator
- $enty{"\300"} = "À"; $enty{"\301"} = "Á";
- $enty{"\302"} = "Â"; $enty{"\303"} = "Ã"; $enty{"\304"} = "Ä";
- $enty{"\305"} = "Å"; $enty{"\306"} = "Æ";
- $enty{"\307"} = "Ç"; $enty{"\310"} = "È";
- $enty{"\311"} = "É"; $enty{"\312"} = "Ê"; $enty{"\313"} = "Ë";
- $enty{"\314"} = "Ì"; $enty{"\315"} = "Í";
- $enty{"\316"} = "Î"; $enty{"\317"} = "Ï"; $enty{"\320"} = "Ð";
- $enty{"\321"} = "Ñ"; $enty{"\322"} = "Ò";
- $enty{"\323"} = "Ó"; $enty{"\324"} = "Ô";
- $enty{"\325"} = "Õ"; $enty{"\326"} = "Ö";
- $enty{"\330"} = "Ø"; $enty{"\331"} = "Ù";
- $enty{"\332"} = "Ú"; $enty{"\333"} = "Û"; $enty{"\334"} = "Ü";
- $enty{"\335"} = "Ý"; $enty{"\336"} = "Þ";
- $enty{"\337"} = "ß"; $enty{"\340"} = "à";
- $enty{"\341"} = "á"; $enty{"\342"} = "â";
- $enty{"\343"} = "ã"; $enty{"\344"} = "ä"; $enty{"\345"} = "å";
- $enty{"\346"} = "æ"; $enty{"\347"} = "ç";
- $enty{"\350"} = "è"; $enty{"\351"} = "é";
- $enty{"\352"} = "ê"; $enty{"\353"} = "ë"; $enty{"\354"} = "ì";
- $enty{"\355"} = "í"; $enty{"\356"} = "î"; $enty{"\357"} = "ï";
- $enty{"\360"} = "ð"; $enty{"\361"} = "ñ"; $enty{"\362"} = "ò";
- $enty{"\363"} = "ó"; $enty{"\364"} = "ô";
- $enty{"\365"} = "õ"; $enty{"\366"} = "ö";
- $enty{"\370"} = "ø"; $enty{"\371"} = "ù";
- $enty{"\372"} = "ú"; $enty{"\373"} = "û"; $enty{"\374"} = "ü";
- $enty{"\375"} = "ý"; $enty{"\376"} = "þ"; $enty{"\377"} = "ÿ";
- $enty{"\256"} = "®"; $enty{"\251"} = "©";
- #
- # Main
- #
- $stuperlRS = $/;
- while (<>) {
- if ($_ =~ /$stuperlRS$/o) { # strip record separator, allow for last line to
- chop;} # be unterminated.
- if ($_ =~ /[\300-\377]/) {
- foreach $X (keys %enty) {
- if ($_ =~ $X) {
- $s_ = $enty{$X}; $_ =~ s/$X/$s_/g;}}}
- print $_;}
- ##EOF
-