home *** CD-ROM | disk | FTP | other *** search
- --::::::::::
- --character_set.pro
- --::::::::::
- -------- SIMTEL20 Ada Software Repository Prologue ------------
- -- -*
- -- Unit name : CHARACTER_SET
- -- Version : 2.0
- -- Author : Joseph M. Orost
- -- : Concurrent Computer Corporation
- -- : 106 Apple Street
- -- : Tinton Falls, NJ 07724
- -- DDN Address : vax135!petsd!joe@BERKELEY.ARPA
- -- Date created : 15 Feb 85
- -- Release date : 15 Feb 85
- -- Last update : 12 Mar 87
- -- Machine/System Compiled/Run on : CCUR 3200MPS, C3ADA R00-01/BETA
- -- -*
- ---------------------------------------------------------------
- -- -*
- -- Keywords :
- ----------------: character, character set
- --
- -- Abstract : CHARACTER_SET provides a number of test routines
- ----------------: which determine if a given character falls into
- -- a particular class of characters. See the visible section for
- -- details. It also provides routines for character and string
- -- letter case conversion (to lower case, to upper case) and for
- -- naming control characters.
- -- -*
- ------------------ Revision history ---------------------------
- -- -*
- -- DATE VERSION AUTHOR HISTORY
- -- 2/15/85 1.0 Richard Conn Initial Release
- -- 2/25/85 1.1 Richard Conn Cosmetic, Readability Changes
- -- 3/12/87 2.0 Joseph M. Orost Rewrite for better performance.
- -- Replaced functions with arrays.
- -- Source compatible unless
- -- named notation was used for
- -- function calls:
- -- IS_LOWER(CH => C) no longer works,
- -- but IS_LOWER(C) is OK.
- -- -*
- ------------------ Distribution and Copyright -----------------
- -- -*
- -- This software is released to the Ada community.
- -- This software is released to the Public Domain (note:
- -- software released to the Public Domain is not subject
- -- to copyright protection).
- -- Restrictions on use or distribution: NONE
- -- -*
- ------------------ Disclaimer ---------------------------------
- -- -*
- -- This software and its documentation are provided "AS IS" and
- -- without any expressed or implied warranties whatsoever.
- -- No warranties as to performance, merchantability, or fitness
- -- for a particular purpose exist.
- --
- -- Because of the diversity of conditions and hardware under
- -- which this software may be used, no warranty of fitness for
- -- a particular purpose is offered. The user is advised to
- -- test the software thoroughly before relying on it. The user
- -- must assume the entire risk and liability of using this
- -- software.
- --
- -- In no event shall any person or organization of people be
- -- held responsible for any direct, indirect, consequential
- -- or inconsequential damages or lost profits.
- -- -*
- -------------------END-PROLOGUE--------------------------------
- --::::::::::
- --character_set.ada
- --::::::::::
- --
- -- Components Package CHARACTER_SET
- -- by Joseph M. Orost, Concurrent Computer Corporation
- -- Version 2.0, Date 12 Mar 87
- -- by Richard Conn, TI Ada Technology Branch
- -- Version 1.1, Date 25 Feb 85
- -- Version 1.0, Date 13 Feb 85
- --
-
- package CHARACTER_SET is
-
- --
- -- These routines test for the following subsets of ASCII
- --
- -- Routine Subset tested for
- -- ======= =================
- -- ALPHA 'a'..'z' | 'A'..'Z'
- -- ALPHA_NUMERIC ALPHA | '0'..'9'
- -- CONTROL < ' ' | DEL
- -- DIGIT '0'..'9'
- -- GRAPHIC ' ' < ch < DEL (does not include space)
- -- HEXADECIMAL DIGIT | 'A'..'F' | 'a'..'f'
- -- LOWER 'a'..'z'
- -- PRINTABLE GRAPHIC | ' '
- -- PUNCTUATION GRAPHIC and not ALPHA_NUMERIC
- -- SPACE HT | LF | VT | FF | CR | ' '
- -- UPPER 'A'..'Z'
- --
-
- use ASCII;
-
- type BIT_ARRAY is array (CHARACTER) of BOOLEAN;
- pragma PACK (BIT_ARRAY);
- pragma BIT_PACK (BIT_ARRAY); --Compiler dependent
-
- IS_ALPHA : constant BIT_ARRAY :=
- BIT_ARRAY'('a' .. 'z' => TRUE, 'A' .. 'Z' => TRUE, others => FALSE);
-
- IS_ALPHA_NUMERIC : constant BIT_ARRAY :=
- BIT_ARRAY'
- ('a' .. 'z' => TRUE, 'A' .. 'Z' => TRUE, '0' .. '9' => TRUE,
- others => FALSE );
-
- IS_CONTROL : constant BIT_ARRAY :=
- BIT_ARRAY'(NUL .. US => TRUE, DEL => TRUE, others => FALSE);
-
- IS_DIGIT : constant BIT_ARRAY :=
- BIT_ARRAY'('0' .. '9' => TRUE, others => FALSE);
-
- IS_GRAPHIC : constant BIT_ARRAY :=
- BIT_ARRAY'('!' .. '~' => TRUE, others => FALSE);
-
- IS_HEXADECIMAL : constant BIT_ARRAY :=
- BIT_ARRAY'
- ('0' .. '9' => TRUE, 'A' .. 'F' => TRUE, 'a' .. 'f' => TRUE,
- others => FALSE );
-
- IS_LOWER : constant BIT_ARRAY :=
- BIT_ARRAY'('a' .. 'z' => TRUE, others => FALSE);
-
- IS_PRINTABLE : constant BIT_ARRAY :=
- BIT_ARRAY'(' ' .. '~' => TRUE, others => FALSE);
-
- IS_PUNCTUATION : constant BIT_ARRAY :=
- BIT_ARRAY'
- ('!' .. '/' => TRUE, ':' .. '@' => TRUE, '[' .. '`' => TRUE,
- '{' .. '~' => TRUE, others => FALSE );
-
- IS_SPACE : constant BIT_ARRAY :=
- BIT_ARRAY'
- (HT => TRUE , LF => TRUE , VT => TRUE , FF => TRUE ,
- CR => TRUE , ' ' => TRUE , others => FALSE);
-
- IS_UPPER : constant BIT_ARRAY :=
- BIT_ARRAY'('A' .. 'Z' => TRUE, others => FALSE);
-
- --
- -- These routines convert characters and strings to upper- or lower-case
- --
-
- type TRANSLATION_ARRAY is array (CHARACTER) of CHARACTER;
- pragma PACK (TRANSLATION_ARRAY);
-
- --
- -- LOWER can be used in place of TO_LOWER. (Ada won't allow overloading
- -- of an object vs. a procedure.)
- --
- LOWER : constant TRANSLATION_ARRAY :=
- (NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS , HT , LF , VT , FF , CR ,
- SO , SI , DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EM , SUB, ESC,
- FS , GS , RS , US , ' ', '!', '"', '#', '$', '%', '&', ''', '(', ')',
- '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', ':', ';', '<', '=', '>', '?', '@', 'a', 'b', 'c', 'd', 'e',
- 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
- 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', '\', ']', '^', '_', '`', 'a',
- 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}',
- '~', DEL);
-
- function TO_LOWER (CH : in CHARACTER) return CHARACTER;
-
- procedure TO_LOWER (CH : in out CHARACTER);
-
- procedure TO_LOWER (STR : in out STRING);
-
- --
- -- UPPER can be used in place of TO_UPPER
- --
- UPPER : constant TRANSLATION_ARRAY :=
- (NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS , HT , LF , VT , FF , CR ,
- SO , SI , DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EM , SUB, ESC,
- FS , GS , RS , US , ' ', '!', '"', '#', '$', '%', '&', ''', '(', ')',
- '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E',
- 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
- 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\', ']', '^', '_', '`', 'A',
- 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
- 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}',
- '~', DEL);
-
- function TO_UPPER (CH : in CHARACTER) return CHARACTER;
-
- procedure TO_UPPER (CH : in out CHARACTER);
-
- procedure TO_UPPER (STR : in out STRING);
-
- --
- -- These routines return the names of the control characters
- --
-
- subtype CONTROL_CHARACTER_NAME_2 is STRING (1 .. 2);
-
- subtype CONTROL_CHARACTER_NAME_3 is STRING (1 .. 3);
-
- --
-
- function CC_NAME_2 (CH : CHARACTER) return CONTROL_CHARACTER_NAME_2;
-
- function CC_NAME_3 (CH : CHARACTER) return CONTROL_CHARACTER_NAME_3;
-
- end CHARACTER_SET;
-
- package body CHARACTER_SET is
-
- function TO_LOWER (CH : in CHARACTER) return CHARACTER is
- begin
- return LOWER (CH);
- end TO_LOWER;
-
- procedure TO_LOWER (CH : in out CHARACTER) is
- begin
- CH := LOWER (CH);
- end TO_LOWER;
-
- procedure TO_LOWER (STR : in out STRING) is
- begin
- for I in STR'FIRST .. STR'LAST loop
- STR (I) := LOWER (STR (I));
- end loop;
- end TO_LOWER;
-
- function TO_UPPER (CH : in CHARACTER) return CHARACTER is
- begin
- return UPPER (CH);
- end TO_UPPER;
-
- procedure TO_UPPER (CH : in out CHARACTER) is
- begin
- CH := UPPER (CH);
- end TO_UPPER;
-
- procedure TO_UPPER (STR : in out STRING) is
- begin
- for I in STR'FIRST .. STR'LAST loop
- STR (I) := UPPER (STR (I));
- end loop;
- end TO_UPPER;
-
- function CC_NAME_2 (CH : CHARACTER) return CONTROL_CHARACTER_NAME_2 is
- NAME : CONTROL_CHARACTER_NAME_2;
- begin
- case CH is
- when ASCII.NUL => NAME := "^@";
- when ASCII.SOH => NAME := "^A";
- when ASCII.STX => NAME := "^B";
- when ASCII.ETX => NAME := "^C";
- when ASCII.EOT => NAME := "^D";
- when ASCII.ENQ => NAME := "^E";
- when ASCII.ACK => NAME := "^F";
- when ASCII.BEL => NAME := "^G";
- when ASCII.BS => NAME := "^H";
- when ASCII.HT => NAME := "^I";
- when ASCII.LF => NAME := "^J";
- when ASCII.VT => NAME := "^K";
- when ASCII.FF => NAME := "^L";
- when ASCII.CR => NAME := "^M";
- when ASCII.SO => NAME := "^N";
- when ASCII.SI => NAME := "^O";
- when ASCII.DLE => NAME := "^P";
- when ASCII.DC1 => NAME := "^Q";
- when ASCII.DC2 => NAME := "^R";
- when ASCII.DC3 => NAME := "^S";
- when ASCII.DC4 => NAME := "^T";
- when ASCII.NAK => NAME := "^U";
- when ASCII.SYN => NAME := "^V";
- when ASCII.ETB => NAME := "^W";
- when ASCII.CAN => NAME := "^X";
- when ASCII.EM => NAME := "^Y";
- when ASCII.SUB => NAME := "^Z";
- when ASCII.ESC => NAME := "^[";
- when ASCII.FS => NAME := "^\";
- when ASCII.GS => NAME := "^]";
- when ASCII.RS => NAME := "^^";
- when ASCII.US => NAME := "^_";
- when ASCII.DEL => NAME := "^`";
- when others =>
- NAME := " ";
- NAME (2) := CH;
- end case;
- return NAME;
- end CC_NAME_2;
-
- function CC_NAME_3 (CH : CHARACTER) return CONTROL_CHARACTER_NAME_3 is
- NAME : CONTROL_CHARACTER_NAME_3;
- begin
- case CH is
- when ASCII.NUL => NAME := "NUL";
- when ASCII.SOH => NAME := "SOH";
- when ASCII.STX => NAME := "STX";
- when ASCII.ETX => NAME := "ETX";
- when ASCII.EOT => NAME := "EOT";
- when ASCII.ENQ => NAME := "ENQ";
- when ASCII.ACK => NAME := "ACK";
- when ASCII.BEL => NAME := "BEL";
- when ASCII.BS => NAME := "BS ";
- when ASCII.HT => NAME := "HT ";
- when ASCII.LF => NAME := "LF ";
- when ASCII.VT => NAME := "VT ";
- when ASCII.FF => NAME := "FF ";
- when ASCII.CR => NAME := "CR ";
- when ASCII.SO => NAME := "SO ";
- when ASCII.SI => NAME := "SI ";
- when ASCII.DLE => NAME := "DLE";
- when ASCII.DC1 => NAME := "DC1";
- when ASCII.DC2 => NAME := "DC2";
- when ASCII.DC3 => NAME := "DC3";
- when ASCII.DC4 => NAME := "DC4";
- when ASCII.NAK => NAME := "NAK";
- when ASCII.SYN => NAME := "SYN";
- when ASCII.ETB => NAME := "ETB";
- when ASCII.CAN => NAME := "CAN";
- when ASCII.EM => NAME := "EM ";
- when ASCII.SUB => NAME := "SUB";
- when ASCII.ESC => NAME := "ESC";
- when ASCII.FS => NAME := "FS ";
- when ASCII.GS => NAME := "GS ";
- when ASCII.RS => NAME := "RS ";
- when ASCII.US => NAME := "US ";
- when ASCII.DEL => NAME := "DEL";
- when others =>
- NAME := " ";
- NAME (2) := CH;
- end case;
- return NAME;
- end CC_NAME_3;
- end CHARACTER_SET;
-
- --::::::::::
- --tcset.ada
- --::::::::::
- --
- -- Test for routines in CHARACTER_SET
- -- by Richard Conn, TI Ada Technology Branch
- --
-
- with CHARACTER_SET;
- use CHARACTER_SET;
- with TEXT_IO;
- use TEXT_IO;
-
- procedure TCSET is
- LST_FILE : FILE_TYPE;
- INCHAR : CHARACTER;
- INDEX : NATURAL;
-
- procedure MARK (BOOLEAN_VALUE : BOOLEAN) is
- begin
- if BOOLEAN_VALUE then
- PUT (LST_FILE, " x ");
- else
- PUT (LST_FILE, " ");
- end if;
- end MARK;
-
- procedure BANNER is
- begin
- PUT (LST_FILE, ASCII.FF);
- NEW_LINE (LST_FILE);
- PUT (LST_FILE, " ");
- PUT (LST_FILE, " A ");
- PUT (LST_FILE, " A ");
- PUT (LST_FILE, " C ");
- PUT (LST_FILE, " D ");
- PUT (LST_FILE, " G ");
- PUT (LST_FILE, " H ");
- PUT (LST_FILE, " L ");
- PUT (LST_FILE, " P ");
- PUT (LST_FILE, " P ");
- PUT (LST_FILE, " S ");
- PUT (LST_FILE, " U ");
- NEW_LINE (LST_FILE);
- PUT (LST_FILE, " ");
- PUT (LST_FILE, " l ");
- PUT (LST_FILE, " l ");
- PUT (LST_FILE, " n ");
- PUT (LST_FILE, " i ");
- PUT (LST_FILE, " r ");
- PUT (LST_FILE, " e ");
- PUT (LST_FILE, " o ");
- PUT (LST_FILE, " r ");
- PUT (LST_FILE, " u ");
- PUT (LST_FILE, " p ");
- PUT (LST_FILE, " p ");
- NEW_LINE (LST_FILE);
- PUT (LST_FILE, "ASCII ");
- PUT (LST_FILE, " p ");
- PUT (LST_FILE, " n ");
- PUT (LST_FILE, " t ");
- PUT (LST_FILE, " g ");
- PUT (LST_FILE, " a ");
- PUT (LST_FILE, " x ");
- PUT (LST_FILE, " w ");
- PUT (LST_FILE, " i ");
- PUT (LST_FILE, " n ");
- PUT (LST_FILE, " a ");
- PUT (LST_FILE, " p ");
- NEW_LINE (LST_FILE);
- PUT (LST_FILE, " Code ");
- PUT (LST_FILE, " h ");
- PUT (LST_FILE, " u ");
- PUT (LST_FILE, " r ");
- PUT (LST_FILE, " i ");
- PUT (LST_FILE, " p ");
- PUT (LST_FILE, " n ");
- PUT (LST_FILE, " e ");
- PUT (LST_FILE, " n ");
- PUT (LST_FILE, " c ");
- PUT (LST_FILE, " c ");
- PUT (LST_FILE, " e ");
- NEW_LINE (LST_FILE);
- PUT (LST_FILE, " ");
- PUT (LST_FILE, " a ");
- PUT (LST_FILE, " m ");
- PUT (LST_FILE, " l ");
- PUT (LST_FILE, " t ");
- PUT (LST_FILE, " h ");
- PUT (LST_FILE, " m ");
- PUT (LST_FILE, " r ");
- PUT (LST_FILE, " t ");
- PUT (LST_FILE, " t ");
- PUT (LST_FILE, " e ");
- PUT (LST_FILE, " r ");
- NEW_LINE (LST_FILE);
- NEW_LINE (LST_FILE);
- end BANNER;
- begin
- CREATE (LST_FILE, OUT_FILE, "TCSET.OUT");
- INDEX := 0;
- for CH in CHARACTER'FIRST .. CHARACTER'LAST loop
- if INDEX mod 40 = 0 then
- BANNER;
- else
- if INDEX mod 5 = 0 then
- NEW_LINE (LST_FILE);
- end if;
- end if;
- INDEX := INDEX + 1;
- PUT (LST_FILE, ' ');
- PUT (LST_FILE, CC_NAME_3 (CH));
- PUT (LST_FILE, " ");
- PUT (LST_FILE, CC_NAME_2 (CH));
- PUT (LST_FILE, " | ");
- MARK (IS_ALPHA (CH));
- MARK (IS_ALPHA_NUMERIC (CH));
- MARK (IS_CONTROL (CH));
- MARK (IS_DIGIT (CH));
- MARK (IS_GRAPHIC (CH));
- MARK (IS_HEXADECIMAL (CH));
- MARK (IS_LOWER (CH));
- MARK (IS_PRINTABLE (CH));
- MARK (IS_PUNCTUATION (CH));
- MARK (IS_SPACE (CH));
- MARK (IS_UPPER (CH));
- NEW_LINE (LST_FILE);
- end loop;
- CLOSE (LST_FILE);
- end TCSET;
-
-