home *** CD-ROM | disk | FTP | other *** search
- package I18N::Collate;
-
- =head1 NAME
-
- I18N::Collate - compare 8-bit scalar data according to the current locale
-
- =head1 SYNOPSIS
-
- use I18N::Collate;
- setlocale(LC_COLLATE, 'locale-of-your-choice');
- $s1 = new I18N::Collate "scalar_data_1";
- $s2 = new I18N::Collate "scalar_data_2";
-
- =head1 DESCRIPTION
-
- This module provides you with objects that will collate
- according to your national character set, provided that the
- POSIX setlocale() function is supported on your system.
-
- You can compare $s1 and $s2 above with
-
- $s1 le $s2
-
- to extract the data itself, you'll need a dereference: $$s1
-
- This uses POSIX::setlocale(). The basic collation conversion is done by
- strxfrm() which terminates at NUL characters being a decent C routine.
- collate_xfrm() handles embedded NUL characters gracefully. Due to C<cmp>
- and overload magic, C<lt>, C<le>, C<eq>, C<ge>, and C<gt> work also. The
- available locales depend on your operating system; try whether C<locale
- -a> shows them or man pages for "locale" or "nlsinfo" or
- the direct approach C<ls /usr/lib/nls/loc> or C<ls
- /usr/lib/nls>. Not all the locales that your vendor supports
- are necessarily installed: please consult your operating system's
- documentation and possibly your local system administration.
-
- The locale names are probably something like
- C<"xx_XX.(ISO)?8859-N"> or C<"xx_XX.(ISO)?8859N">, for example
- C<"fr_CH.ISO8859-1"> is the Swiss (CH) variant of French (fr),
- ISO Latin (8859) 1 (-1) which is the Western European character set.
-
- =cut
-
- # I18N::Collate.pm
- #
- # Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
- # Helsinki University of Technology, Finland
- #
- # Acks: Guy Decoux <decoux@moulon.inra.fr> understood
- # overloading magic much deeper than I and told
- # how to cut the size of this code by more than half.
- # (my first version did overload all of lt gt eq le ge cmp)
- #
- # Purpose: compare 8-bit scalar data according to the current locale
- #
- # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
- #
- # Exports: setlocale 1)
- # collate_xfrm 2)
- #
- # Overloads: cmp # 3)
- #
- # Usage: use I18N::Collate;
- # setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
- # $s1 = new I18N::Collate "scalar_data_1";
- # $s2 = new I18N::Collate "scalar_data_2";
- #
- # now you can compare $s1 and $s2: $s1 le $s2
- # to extract the data itself, you need to deref: $$s1
- #
- # Notes:
- # 1) this uses POSIX::setlocale
- # 2) the basic collation conversion is done by strxfrm() which
- # terminates at NUL characters being a decent C routine.
- # collate_xfrm handles embedded NUL characters gracefully.
- # 3) due to cmp and overload magic, lt le eq ge gt work also
- # 4) the available locales depend on your operating system;
- # try whether "locale -a" shows them or man pages for
- # "locale" or "nlsinfo" work or the more direct
- # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
- # Not all the locales that your vendor supports
- # are necessarily installed: please consult your
- # operating system's documentation.
- # The locale names are probably something like
- # 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
- # for example 'fr_CH.ISO8859-1' is the Swiss (CH)
- # variant of French (fr), ISO Latin (8859) 1 (-1)
- # which is the Western European character set.
- #
- # Updated: 19960104 1946 GMT
- #
- # ---
-
- use POSIX qw(strxfrm LC_COLLATE);
-
- require Exporter;
-
- @ISA = qw(Exporter);
- @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
- @EXPORT_OK = qw();
-
- use overload qw(
- fallback 1
- cmp collate_cmp
- );
-
- sub new { my $new = $_[1]; bless \$new }
-
- sub setlocale {
- my ($category, $locale) = @_[0,1];
-
- POSIX::setlocale($category, $locale) if (defined $category);
- # the current $LOCALE
- $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
- }
-
- sub C {
- my $s = ${$_[0]};
-
- $C->{$LOCALE}->{$s} = collate_xfrm($s)
- unless (defined $C->{$LOCALE}->{$s}); # cache when met
-
- $C->{$LOCALE}->{$s};
- }
-
- sub collate_xfrm {
- my $s = $_[0];
- my $x = '';
-
- for (split(/(\000+)/, $s)) {
- $x .= (/^\000/) ? $_ : strxfrm("$_\000");
- }
-
- $x;
- }
-
- sub collate_cmp {
- &C($_[0]) cmp &C($_[1]);
- }
-
- # init $LOCALE
-
- &I18N::Collate::setlocale();
-
- 1; # keep require happy
-