home *** CD-ROM | disk | FTP | other *** search
- #!/bin/perl
- @REM=("
- @perl -S %0.bat %1 %2 %3 %4 %5 %6 %7 %8 %9
- @goto end ") if 0 ;
- #
- # Fix and convert tape archives
- #
- # (C) Copyright 1990 Diomidis Spinellis. All rights reserved.
- #
- # Can be copied and used as long as the copyright notice is retained.
- # Modified copies must be marked as such.
- # This is a beta release. Use this at your own risk.
- # I would appreciate feedback on bugs, improvements etc.
- #
- # dds@cc.ic.ac.uk
- #
- # This program IS NOT EFFICIENT.
- # A have tried instead to make it readable, flexible and easy to use.
- # For this I use lots of subroutines, local variables and higher order
- # functions. If you want something efficient rewrite it in C.
- #
- # You can very easily add a new conversion mode. Just add the conversion
- # function. If the conversion function needs to be applied to every
- # filename component add a map function and a function to prepare a component
- # to have acounter added to it.
- # If you need to convert another part of the header (e.g. uid) search for the
- # string HEADERMOD in this file.
-
- do 'getopts.pl' || die "$0: Unable to find getopts library: $!\n";
- &Getopts('mncaf:t:s:') || $usage++;
-
- $count = 0;
- $proc = 'copy';
-
- if ($opt_c) {
- $proc = 'canonic';
- $count++;
- }
- if ($opt_a) {
- $proc = 'noabs';
- $count++;
- }
- if ($opt_f) {
- $proc = 'from_' . $opt_f;
- $count++;
- }
- if ($opt_t) {
- $proc = 'to_' . $opt_t;
- $count++;
- }
- if ($opt_s) {
- $proc = 'usershort';
- $usershortlen = $opt_s;
- $count++;
- }
-
- if ($count > 1) {
- print STDERR "$0: Only one of -c -a -f -t -s can be specified\n";
- $usage++;
- }
-
- if (! eval('&' . $proc . '("foo");')) {
- print STDERR "$0: Bad option specified $proc\n";
- $usage++;
- }
- delete $map{'foo'};
-
-
- if ($usage) {
- print STDERR "Usage: $0 -n -c -a -s N -f msdos|vms -t msdos|v7";
- print STDERR "
- -n Do not work on tar files. Read and print a list of file names.
- -c Canonicalise filenames by removing /../, /./ and //.
- -a Fix absolute filenames by removing leading /.
- -s Convert filenames to the specified length N.
- -f Convert from format. Format can be msdos or vms.
- -t Convert to format. Format can be v7 (7th Edition) or msdos.
- -m Print a map table containing initial and final name on stderr.
-
- Only one of -c -a -f -t -s can be specified.\n";
- exit 1;
- }
-
- if ($opt_n) {
- while (<>) {
- s/\n$//;
- print &$proc($_), "\n";
- }
- } else {
- ©tar();
- }
-
- if ($opt_m) {
- while (($from, $to) = each(%map)) {
- print STDERR "$from $to\n";
- }
- }
-
- exit 0;
-
- # Remove absolute file names.
- # We canonicalise since foo//bar is /bar on many Unixes
- sub noabs {
- local($name) = $_[0];
-
- $name = &canonic($name);
- $name =~ s/^\///;
- return $name;
- }
-
- # Convert to MS-DOS
- # - Shorten name to 8 characters
- # - Remove all dots, but the last one
- # - Shorten extension to 3 characters
- # - Convert ,=+<>|; *?:[]\" to ^
- # - Convert device name (con, aux ...) to _device
- sub to_msdos {
- local($nm);
-
- $nm = $_[0];
- return &filemap($nm, 'mapmsdos', 'countprepmsdos');
- # The following line fails on perl 3.0 patchlevel 18 XXX
- # return &filemap($_[0], 'mapmsdos', 'countprepmsdos');
- }
-
- # Shortening routine for MS-DOS
- sub mapmsdos {
- local($name) = $_[0];
- local($ext);
-
- # Leave only the last dot
- while ($name =~ s/(.*)\.(.*)\.(.*)/\1_\2.\3/g) {
- ;
- }
- # Convert funny characters to ^
- $name =~ s/[,=+<>|; *?:[\]\\]/^/g;
- # Shorten the name
- if ($name =~ m/\./) {
- ($name, $ext) = split(/\./, $name);
- $name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i;
- return &shorten($name, 8) . '.' . &shorten($ext, 3);
- } else {
- $name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i;
- return &shorten($name, 8);
- }
- }
-
- # Count preparation routine for MS-DOS
- sub countprepmsdos {
- local($name) = $_[0];
- local($ext);
-
- if ($name =~ m/\./) {
- ($name, $ext) = split(/\./, $name);
- return $name . '.' . substr($ext, 0, 1);
- } else {
- return $name . '.';
- }
- }
-
-
- # Convert to 7th Edition type filesystems
- # - Shorten filenames to 14 characters
- sub to_v7 {
- local($nm);
-
- $nm = $_[0];
- return &filemap($nm, 'mapv7', 'countprepv7');
- }
-
- # Shortening routine for V7
- sub mapv7 {
- return &shorten($_[0], 14);
- }
-
- # Count preparation routine for V7
- sub countprepv7 {
- return substr($_[0], 0, 12);
- }
-
- # Shorten the filename components by a user specified amount
- sub usershort {
- local($nm);
-
- $nm = $_[0];
- return &filemap($nm, 'mapusershort', 'countprepusershort');
- }
-
- # Shortening routine for usershort
- sub mapusershort {
- return &shorten($_[0], $usershortlen);
- }
-
- # Count preparation routine for usershort
- sub countprepusershort {
- return substr($_[0], 0, $usershortlen);
- }
-
- # Convert from VMS
- # - Convert uppercase to lowercase
- # - Remove leading device name: or node::
- # - Convert directory form [xxx] to xxx/
- # - Remove trailing generation number
- # - Remove quoting characters ^V and " (XXX)
- # NOTE: I am an ignorant on VMS, so this probably need fixing. UNTESTED
- sub from_vms {
- local($name) = $_[0];
-
- $name =~ tr/[A-Z]\\/[a-z]\//;
- $name =~ s/^[a-z]*::?//;
- $name =~ s/\[(.*)\](.*)/\1\/\2/;
- $name =~ s/;[0-9]+$//;
- $name =~ s/["\026]//g;
- return $name;
- }
-
- # Convert from MS-DOS
- # - Convert \ to /
- # - Convert uppercase to lowercase
- # - Remove leading device names
- sub from_msdos {
- local($name) = $_[0];
-
- $name =~ tr/[A-Z]\\/[a-z]\//;
- $name =~ s/^[a-z]://;
- return $name;
- }
-
- # filemap(name, mapfunc, countprepfunc)
- # Go through every path element of name substituting it with the result
- # of mapfunc(element). If the filename is already used then substitute it
- # with the result of applying countprepfunc to with a two letter counter
- # appended.
- # Two associative arrays are kept to avoid the chance of re-using a name
- # %map contains the mappings from big names to small names
- # %used contains 1 for every short name that has been used
- # We keep partial file names to speed up the process
- # The filenames are always canonicalised
- sub filemap {
- local(
- @big, # Contains components of original
- @small, # Result is built in here
- @bigpart, # Part of big that has been done
- @s, # To try alternative mappings
- $name, # Part of path we are dealing with
- $count, # To create distinct names
- $try, # Remember map result
- $mapfunc, # Function to create new elements
- $countprepfunc # Function to prepare for counting
-
- );
-
- $mapfunc = $_[1];
- $countprepfunc = $_[2];
- @big = split(/\//, &canonic($_[0]));
- @small = @bigpart = ();
- while (defined($name = shift(@big))) {
- push(@bigpart, $name);
- if (defined($try = $map{join('/', @bigpart)})) {
- # Found in map
- @small = split(/\//, $try);
- # The next line is needed because of buggy split
- # split(/x/, '') should give ('') not ()
- @small = ('') if $#small == -1;
- } else {
- # Create new map
- # Even if the name is short we may have used it up
- # by shortening up a bigger one, so we may have to
- # count
- $name = &$mapfunc($name);
- $count = '';
- while ($used{join('/', @s = (@small, $name . $count))}) {
- if ($count eq '') {
- $name = &$countprepfunc($name);
- $count = 'AA';
- } else {
- $count++;
- }
- }
- @small = @s;
- $used{join('/', @small)} = 1;
- $map{join('/', @bigpart)} = join('/', @small);
- }
- }
- return join('/', @small);
- }
-
- #
- # Convert a single string to something close to it with length up
- # to length given
- sub shorten {
- local($str, $len) = @_;
-
- # Do "fonetic speling" from end to beginning
- while (
- length($str) > $len && (
- $str =~ s/(.*)([fglmnprst])\2(.*)/\1\2\3/i ||
- $str =~ s/(.*)(ou)(.*)/\1u\3/i ||
- $str =~ s/(.*)(ck)(.*)/\1k\3/i ||
- $str =~ s/(.*)(ks)(.*)/\1x\3/i ||
- $str =~ s/(.*)(sh)(.*)/\1s\3/i ||
- $str =~ s/(.*)(ph)(.*)/\1f\3/i ||
- $str =~ s/(.*)(oo)(.*)/\1u\3/i
- )
- ) { ; }
- # Remove lowercase vowels from the end to the beginning
- while (
- length($str) > $len &&
- $str =~ s/(.*)[aeiou](.*)/\1\2/
- ) { ; }
- # Remove uppercase vowels from the end to the beginning
- while (
- length($str) > $len &&
- $str =~ s/(.*)[AEIOU](.*)/\1\2/
- ) { ; }
- # Finally cut characters from the end
- $str = substr($str, 0, $len);
- return $str;
- }
-
- # Create a canonic file name out of one containing .. and .
- # Employ Unix semantics: empty file means root directory.
- sub canonic {
- local(@comp, @can);
-
- @comp = split(/\//, $_[0]);
- for ($i = 0; $i <= $#comp; $i++) {
- if ($comp[$i] eq '.') {
- ;
- } elsif ($comp[$i] eq '') {
- @can = ();
- push(@can, '');
- } elsif ($comp[$i] eq '..') {
- pop(@can);
- } else {
- push(@can, $comp[$i]);
- }
- }
- return join('/', @can);
- }
-
- # A do nothing procedure
- sub copy {
- return $_[0];
- }
-
- # Copy a tape archive from stdin to stdout
- sub copytar {
- binmode STDIN;
- binmode STDOUT;
-
- forloop: for(;;) {
- read(STDIN, $header, 512) == 512 || die "$0: Couldn't read header: $!\n";
- if ($header eq "\0" x 512) {
- last forloop;
- }
- ($name, $mode, $uid, $gid, $size, $mtime, $checksum, $linkflag, $linkname) = unpack("a100 A7x A7x A7x A12 A12 a8 a1 a100", $header);
- #
- # Header modification code should be put here
- # HEADERMOD
- $name =~ s/[\000 ]*//g;
- $name = &$proc($name);
- if ($linkflag != "\0") {
- $linkname =~ s/[\000 ]*//g;
- $linkname = &$proc($linkname);
- }
-
- # Create dummy header for checksum calculation (checksum is blanks)
- $hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a8 a1 a99x x255",
- ($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', ' ' x 8, $linkflag, $linkname));
- $sz = $size;
- $sz =~ s/ *//g;
- $sz = oct($sz);
- $checksum =~ s/ *//g;
- $checksum = oct($checksum);
- $newcheck = &check($hnew);
- # Create the header with the new checksum
- $hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a6xa1 a1 a99x x255",
- ($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', sprintf('%6o', $newcheck), ' ', $linkflag, $linkname));
- print STDOUT $hnew;
- # Copy contents
- for ($i = 0; $i < $sz; $i += 512) {
- read(STDIN, $contents, 512) == 512 || die "$0: Couldn't read data: $!\n";
- print STDOUT $contents;
- }
- #seek(STDIN, (int($sz / 512) + 1) * 512, 1) unless $sz == 0;
- }
- # Write EOF
- print STDOUT pack("x512", ());
- print STDOUT pack("x512", ());
- }
-
- # Return checksum for tar header block
- sub check {
- $h = $_[0];
- local($i, $s);
-
- $s = 0;
- for($i = 0; $i < 512; $i++) {
- $s += unpack('C', substr($h, $i, 1));
- }
- return $s;
- }
-
- "
- :end ", 0;
-