home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl
- eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
- if $running_under_some_shell;
- #!./perl
- # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
- #
- use 5.8.0;
- use strict;
- use Encode ;
- use Encode::Alias;
- my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
-
- use File::Basename;
- my $name = basename($0);
-
- use Getopt::Long qw(:config no_ignore_case);
-
- my %Opt;
-
- help()
- unless
- GetOptions(\%Opt,
- 'from|f=s',
- 'to|t=s',
- 'list|l',
- 'string|s=s',
- 'check|C=i',
- 'c',
- 'perlqq|p',
- 'debug|D',
- 'scheme|S=s',
- 'resolve|r=s',
- 'help',
- );
-
- $Opt{help} and help();
- $Opt{list} and list_encodings();
- my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
- defined $Opt{resolve} and resolve_encoding($Opt{resolve});
- $Opt{from} || $Opt{to} || help();
- my $from = $Opt{from} || $locale or help("from_encoding unspecified");
- my $to = $Opt{to} || $locale or help("to_encoding unspecified");
- $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
- my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
- $Opt{check} ||= $Opt{c};
- $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
-
- if ($Opt{debug}){
- my $cfrom = Encode->getEncoding($from)->name;
- my $cto = Encode->getEncoding($to)->name;
- print <<"EOT";
- Scheme: $scheme
- From: $from => $cfrom
- To: $to => $cto
- EOT
- }
-
- # we do not use <> (or ARGV) for the sake of binmode()
- @ARGV or push @ARGV, \*STDIN;
-
- unless ($scheme eq 'perlio'){
- binmode STDOUT;
- for my $argv (@ARGV){
- my $ifh = ref $argv ? $argv : undef;
- $ifh or open $ifh, "<", $argv or next;
- binmode $ifh;
- if ($scheme eq 'from_to'){ # default
- while(<$ifh>){
- Encode::from_to($_, $from, $to, $Opt{check});
- print;
- }
- }elsif ($scheme eq 'decode_encode'){ # step-by-step
- while(<$ifh>){
- my $decoded = decode($from, $_, $Opt{check});
- my $encoded = encode($to, $decoded);
- print $encoded;
- }
- } else { # won't reach
- die "$name: unknown scheme: $scheme";
- }
- }
- }else{
- # NI-S favorite
- binmode STDOUT => "raw:encoding($to)";
- for my $argv (@ARGV){
- my $ifh = ref $argv ? $argv : undef;
- $ifh or open $ifh, "<", $argv or next;
- binmode $ifh => "raw:encoding($from)";
- print while(<$ifh>);
- }
- }
-
- sub list_encodings{
- print join("\n", Encode->encodings(":all")), "\n";
- exit 0;
- }
-
- sub resolve_encoding {
- if (my $alias = Encode::resolve_alias($_[0])) {
- print $alias, "\n";
- exit 0;
- } else {
- warn "$name: $_[0] is not known to Encode\n";
- exit 1;
- }
- }
-
- sub help{
- my $message = shift;
- $message and print STDERR "$name error: $message\n";
- print STDERR <<"EOT";
- $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
- $name -l
- $name -r encoding_alias
- -l,--list
- lists all available encodings
- -r,--resolve encoding_alias
- resolve encoding to its (Encode) canonical name
- -f,--from from_encoding
- when omitted, the current locale will be used
- -t,--to to_encoding
- when omitted, the current locale will be used
- -s,--string string
- "string" will be the input instead of STDIN or files
- The following are mainly of interest to Encode hackers:
- -D,--debug show debug information
- -C N | -c | -p check the validity of the input
- -S,--scheme scheme use the scheme for conversion
- EOT
- exit;
- }
-
- __END__
-
- =head1 NAME
-
- piconv -- iconv(1), reinvented in perl
-
- =head1 SYNOPSIS
-
- piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
- piconv -l
- piconv [-C N|-c|-p]
- piconv -S scheme ...
- piconv -r encoding
- piconv -D ...
- piconv -h
-
- =head1 DESCRIPTION
-
- B<piconv> is perl version of B<iconv>, a character encoding converter
- widely available for various Unixen today. This script was primarily
- a technology demonstrator for Perl 5.8.0, but you can use piconv in the
- place of iconv for virtually any case.
-
- piconv converts the character encoding of either STDIN or files
- specified in the argument and prints out to STDOUT.
-
- Here is the list of options. Each option can be in short format (-f)
- or long (--from).
-
- =over 4
-
- =item -f,--from from_encoding
-
- Specifies the encoding you are converting from. Unlike B<iconv>,
- this option can be omitted. In such cases, the current locale is used.
-
- =item -t,--to to_encoding
-
- Specifies the encoding you are converting to. Unlike B<iconv>,
- this option can be omitted. In such cases, the current locale is used.
-
- Therefore, when both -f and -t are omitted, B<piconv> just acts
- like B<cat>.
-
- =item -s,--string I<string>
-
- uses I<string> instead of file for the source of text.
-
- =item -l,--list
-
- Lists all available encodings, one per line, in case-insensitive
- order. Note that only the canonical names are listed; many aliases
- exist. For example, the names are case-insensitive, and many standard
- and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
- instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
- for a full discussion.
-
- =item -C,--check I<N>
-
- Check the validity of the stream if I<N> = 1. When I<N> = -1, something
- interesting happens when it encounters an invalid character.
-
- =item -c
-
- Same as C<-C 1>.
-
- =item -p,--perlqq
-
- Same as C<-C -1>.
-
- =item -h,--help
-
- Show usage.
-
- =item -D,--debug
-
- Invokes debugging mode. Primarily for Encode hackers.
-
- =item -S,--scheme scheme
-
- Selects which scheme is to be used for conversion. Available schemes
- are as follows:
-
- =over 4
-
- =item from_to
-
- Uses Encode::from_to for conversion. This is the default.
-
- =item decode_encode
-
- Input strings are decode()d then encode()d. A straight two-step
- implementation.
-
- =item perlio
-
- The new perlIO layer is used. NI-S' favorite.
-
- =back
-
- Like the I<-D> option, this is also for Encode hackers.
-
- =back
-
- =head1 SEE ALSO
-
- L<iconv/1>
- L<locale/3>
- L<Encode>
- L<Encode::Supported>
- L<Encode::Alias>
- L<PerlIO>
-
- =cut
-