home *** CD-ROM | disk | FTP | other *** search
- [This document, and STRINGS.TPU, is extracted from SIMTEL20's
- PD2:<MSDOS.TURBOPAS>TPSTRING.ARC.
- STRINGS.TPU is actually the author's STR50.TPU.
- ]
-
- Turbo Pascal Rexx STRINGS Unit
- Version 1.2 for Turbo Pascal 4.0, 5.0 & 5.5
-
- STRINGS.TPU is a Turbo Pascal unit containing 29 string-related
- functions implemented in assembler. These routines are highly
- | optimized (check out the benchmark program) and make extensive use
- of the 80x86 string oriented opcodes. IBM mainframe hackers will
- | notice that this package is substantially equivalent to the string
- routines available in IBM's Rexx language.
-
- | Three versions are included: STR40.TPU, STR50.TPU and STR55.TPU
- | for TPas versions 4.0, 5.0 and 5.5, respectively. Be sure to
- | rename the appropriate file to STRINGS.TPU before usage.
-
- Function Descriptions
- ---------------------
-
- function left(str:string; width:byte; pad:char):string;
- Returns STR left justified in a field of width WIDTH, padded out
- with PAD characters.
- Ex: left('hello',10,'=') returns 'hello====='
- left('hello there',10,'=') returns 'hello ther'
-
- function right(str:string; width:byte; pad:char):string;
- Returns STR right justified in a field of width WIDTH, padded out
- with PAD characters.
- Ex: right('hello',10,'>') returns '>>>>>hello'
- right('hello there',10,'>') returns 'ello there'
-
- function center(str:string; width:byte; pad:char):string;
- Returns STR centered in a string of length WIDTH, padded out
- with PAD chars.
- Ex: center('ABC',8,' ') returns ' ABC '
- center(' ABC ',8,'-') returns '- ABC --'
- center('ABCDE',3,'-') returns 'ABC'
-
- function strip(str:string; opt,ch:char):string;
- Strips leading and/or trailing CH characters from STR.
- Setting OPT to L, T or B causes leading, trailing, or both
- leading and trailing characters to be stripped.
- Ex: strip(' abcdef ','L',' ') returns 'abcdef '
- strip(' abcdef ','t',' ') returns ' abcdef'
- strip(' abcdef ','b',' ') returns 'abcdef'
- strip('++ abcdef +','B','+') returns ' abcdef '
-
- function lastpos(findstr,instr:string; start:byte):byte;
- Returns the position of the last occurrance of FINDSTR in INSTR,
- searching backwards from the character position START. If START
- is 0, the search begins at the end of INSTR. Returns 0 if the
- string is not found.
- Ex: lastpos('he','he was the best',15) returns 9.
- lastpos('he','he was the best',6) returns 1.
- lastpos('he','he was the best',0) returns 9.
- lastpos('he','he was the best',1) returns 0.
-
- function firstpos(findstr,instr:string; start:byte):byte;
- This function was included for completeness. It works exactly
- the same way as Turbo's built in POS function, except for the
- presence of the START option. It is equivalent to:
- start-1+pos(findstr,copy(instr,start,length(instr)-start+1));
- except for being more efficient.
- Ex: firstpos('he','he was the best',15) returns 0.
- firstpos('he','he was the best',6) returns 9.
- firstpos('he','he was the best',0) returns 1.
- firstpos('he','he was the best',1) returns 1.
- | The timing benchmark in BENCHMRK.PAS indicates this runs
- | over NINE times faster than Tpas's POS function, on my NEC
- | V20 CPU (a Japanese clone of an 8088).
- | (this will vary with the specific strings involved)
-
- function copies(str:string; count:byte):string;
- Returns COUNT copies of STR concatenated together.
- If the length of n(<=count) copies of STR would exceed 255,
- n-1 copies are returned.
- Ex: copies('----+',4) returns '----+----+----+----+'
-
- function overlay(new,str:string; pos:byte; pad:char):string;
- Returns the string STR, overlayed by the string NEW, starting
- at character position POS, padding out STR with PAD characters
- if necessary.
- Ex: overlay('aygu','Ronald Reagan',9,'+') returns 'Ronald Raygun'
- overlay('abc','xyz',6,'+') returns 'xyz++abc'
-
- |function instr(new,str:string; pos:byte; pad:char):string;
- | Returns the string STR after insertion of the string NEW,
- | starting at character position POS, padding out STR with PAD
- | characters if necessary.
- | Ex: instr('abcdef','123456',4,'+') returns '123abcdef456'
- | instr('abcdef','123456',10,'+') returns '123456+++abcdef'
- | While having similar function to TPas's INSERT procedure, the
- | timing benchmark in BENCHMRK.PAS indicates instr is about 60%
- | faster.
-
- |function delstr(str:string; pos,len:byte):string;
- | Returns the string STR after deletion of LEN characters,
- | starting at character position POS.
- | Ex: delstr('abcdefgh',4,2) returns 'abcfgh'
- | delstr('abcdefgh',4,20) returns 'abc'
- | While having similar function to TPas's DELETE procedure, the
- | timing benchmark in BENCHMRK.PAS indicates delstr is about 50%
- | faster.
-
- |function substr(str:string; pos,len:byte):string;
- | Returns a substring from STR, starting at position POS
- | and continuing for LEN characters, or until the end of STR.
- | Ex: substr('1234567890',4,2) returns '45'
- | substr('1234567890',4,20) returns '4567890'
- | This function is identical to TPas's COPY function, and it
- | executes at the same speed. So why did I include it? Who
- | knows? I guess I like typing substr better than typing copy...
-
- function uppercase(str:string):string;
- Folds the argument STR to uppercase.
- Ex: uppercase('abcdef123') returns 'ABCDEF123'
-
- function lowercase(str:string):string;
- Folds the argument STR to lowercase.
- Ex: lowercase('ABCDEF123') returns 'abcdef123'
-
- function words(str:string):byte;
- Returns the number of (blank delimited) words in the string STR.
- Ex: words('two four six eight') returns 4.
-
- function werd(str:string; n:byte):string;
- Returns the N'th (blank delimited) word from the string STR.
- The strange spelling is to avoid conflict with Tpas's WORD type.
- Ex: werd('two air is humin',3) returns 'is'
- werd('two air is humin',5) returns ''
-
- function subword(str:string; n,count:byte):string;
- Returns COUNT words from STR, starting at the N'th word.
- Embedded blanks are preserved.
- Ex: subword('one two three four',2,3) returns 'two three four'
- subword('one two three four',2,0) returns ''
- subword('one two three ',2,3) returns 'two three'
- subword('one two three ',4,2) returns ''
-
- function delword(str:string; n,count:byte):string;
- Returns STR with COUNT words deleted, starting at word N.
- Preceeding blanks are preserved.
- Ex: delword('here we go again',2,2) returns 'here again'
-
- function pos2word(str:string; pos:byte):byte;
- Returns the number of the word in STR pointed to by POS. If
- POS points to a blank, the number of the following word is
- returned. If POS points after the last word or end of STR,
- or POS is 0, then 0 is returned.
- Ex: pos2word('abc def ghi ',4) returns 2.
- pos2word('abc def ghi ',6) returns 2.
- pos2word('abc def ghi ',11) returns 3.
- pos2word('abc def ghi ',12) returns 0.
- pos2word('abc def ghi ',0) returns 0.
-
- function word2pos(str:string; wrd:byte):byte;
- Returns the position in STR of the first character in the WRD'th
- word. If WRD>WORDS(STR) or WRD=0, returns 0.
- Ex: word2pos(' abcd e fghi jk',2) returns 8.
- Note that this function is equivalent to Rexx's WORDINDEX
- function.
-
- function space(str:string; spc:byte):string;
- Returns STR with each word separated by SPC blanks.
- Ex: space(' here we go again ',0) returns 'herewegoagain'
- space(' here we go again ',1) returns 'here we go again'
- | If the length of the output string would exceed 255, the string
- | is truncated at the end of the last whole word which fits.
-
- function justify(str:string; len:byte):string;
- Distributes blanks between words in STR so that length(STR)=LEN.
- Ex: justify(' a b cd ef ',10)='a b cd ef'
- | The length of STR should be <= than LEN.
- | This runs about eight times faster than the previous version,
- | which was written in pascal.
- | See usage notes below for important information on usage.
-
- function translate(str,intable,outable:string):string;
- Returns STR after translation via the map INTABLE->OUTABLE.
- In other words, each occurrance in STR of the i'th character
- in INTABLE is replaced by the i'th character in OUTABLE.
- Ex: translate('ABC BDE',' BCF','XYZ ') returns 'AYZXYDE'
- INTABLE and OUTABLE should be of the same length.
-
- function verify(str,ref:string; opt:char; start:byte):byte;
- Returns the position of the first character in STR (after START)
- which matches/doesn't match a character in REF. Setting OPT to
- 'M' or 'N' returns matching or non-matching character positions,
- respectively.
- Ex: verify('abcd1ef','0123456789','M',0) returns 5.
- verify('123a125','0123456789','n',0) returns 4.
-
- function compare(s1,s2:string):byte;
- Compares S1 to S2 and returns the position of the first
- characters which don't match, or 0 if all characters match.
- Ex: compare('hello','hello there') returns 6.
- compare('hello','hexlo') returns 3.
- compare('hello','hello') returns 0.
- compare('','') returns 0.
-
- function xrange(c1,c2:char):string;
- Returns a string containing all characters from C1 to C2
- | inclusive. Note the ordering of C1 & C2 can now be reversed.
- Ex: xrange('a','h') returns 'abcdefgh'
- | xrange('h','a') returns 'hgfedcba'
-
- function reverse(str:string):string;
- Returns contents of STR in reverse order.
- Ex: reverse('hello there') returns 'ereht olleh'
-
- function abbrev(str,abbr:string; len:byte):boolean;
- Returns true if ABBR is an 'acceptable' abbreviation for STR.
- The criterion is:
- length(ABBR)>=LEN and ABBR=left(STR,length(ABBR),' ')
- LEN should be set <= length(STR).
- Ex: abbrev('DELETE','DEL',3)=true
- abbrev('DELETE','DELY',3)=false
- abbrev('DELETE','DELET',3)=true
- abbrev('DELETE','DELETEX',3)=false
-
- function d2x(i:word):xstr;
- (XSTR is defined in the TPU as STRING[4])
- Returns a four byte string equal to the hex representation of I.
- Ex: d2x(255) returns '00FF'
-
- function x2d(x:xstr):word;
- Returns the numeric value represented by the xstr X. Upper
- and lower case A-F are valid on input. No checking is done for
- the validity of the characters in X, so garbage input gives
- garbage output. If the validity of X is in doubt, use the
- VERIFY function first:
- validx:=(verify(x,'0123456789ABCDEFabcdef','N')=0);
- Ex: x2d('7F') returns 127.
-
- Usage Notes
- -----------
-
- Note that Rexx's FIND and WORDLENGTH functions can be readily
- synthesized using functions in this package:
-
- {find returns the number of the word in str1 where str2 starts}
- find(str1,str2) ::= pos2word(str1,firstpos(str2,str1,1))
-
- {wordlength returns the length of a word in str}
- wordlength(str,n) ::= length(word(str,n))
-
- The previous version of justify did an implicit space(str,1) upon
- entry. However, it turns out that any reasonable text formatting
- algorithm will require that a space() be done BEFORE the call to
- justify (see TXTFMT.PAS for an example) so the implicit call is
- actually redundant, so I got rid of it. This also allows the
- added flexibility of inserting an extra space after periods (at
- the end of sentences) before calling justify, if you're picky
- about such things. Justify DOES do an implicit strip(str,'B',' ')
- upon entry.
-
- Why the lack of VAR string parameters? (in case you were wondering)
-
- I initially wrote this unit to use VAR string formal parameters in
- the interest of speed, but I've since discovered that when calling
- external assembler routines with value string formal parameters,
- when the actual parameter is a string variable (as opposed to a
- string expression), TP passes a pointer to the ACTUAL variable,
- not a copy of the variable. While this behavior isn't consistent
- with the pascal standard, assembler isn't pascal, and it does give
- the programmer the best of both worlds: fast execution and low
- stack overhead when using string variables as parameters, and the
- flexibility of value parameters. At any rate, string variable
- function parameters are NEVER modified.
-
- Differences between releases:
-
- Release 1.05:
-
- Converted ABBREV and DELWORD to asm. Included turbo compiler
- directives {$N-,E-,D-,L-,B-,I-,R-,S+,V-} for optimization. Added
- PAD parameter to OVERLAY. Added CHGSTR. Fixed bug in FIRSTPOS.
- Changed behavior of SUBWORD when COUNT=0, to match Rexx's SUBWORD
- function.
-
- Release 1.1:
-
- Compiled for Tpas 5.0. Modified to cooperate with Tpas's dead
- code elimination feature. Added X2D & D2X. Changed JUSTIFY to
- use integer arithmetic for added speed. Changed name of function
- WORD to WERD to avoid conflict with Tpas's new WORD type. Added
- type XSTR for use with X2D & D2X.
-
- Release 1.11:
-
- Tweaked LEFT, RIGHT, SPACE and COMPARE for optimization.
-
- |Release 1.2:
- |
- | Added SUBSTR, INSTR & DELSTR. Converted JUSTIFY to asm. Got rid
- | of CHGSTR (it didn't seem as useful as I'd first thought, it's not
- | part of standard Rexx, and it was out of place as a pascal routine
- | among asm routines). Changed truncation behavior in SPACE when
- | output string exceeds 255 chars. Changed XRANGE to allow reverse
- | ordering of C1 and C2, to produce reversed output. Included
- | benchmark and test suite programs. Included TPU versions for TP
- | 4.0, 5.0 and 5.5.
-
- Examples:
-
- Capitalize the first char in each word in a string S:
- for i:=1 to words(S);
- j:=word2pos(S,i);
- S:=overlay(upcase(S[j]),S,j);
- end;
-
- Find the lowest-ordered alphabetic character in a string STR
- of uppercase characters:
- lochar:=chr(ord('A')+verify(xrange('A','Z'),STR,'M',0)-1);
-
- Find the highest-ordered alphabetic character in a string STR
- of uppercase characters:
- hichar:=
- chr(ord('Z')-verify(xrange('Z','A'),STR,'M',0)+1);
-
- Replace non-alphabetic chars with blanks in a string S:
- S:=translate(S,xrange('!','9')+xrange(':','@')+
- xrange('[','`')+xrange('{',''),left('',33,' '));
- Or:
- S:=translate(S,translate(xrange('!',''),xrange('A','Z')+xrange('a','z'),
- left('',52,' ')),left('',ord('')-ord('!')+1,' '));
-
- Generate a sorted string S consisting of chars between '0' and 'z',
- none of which occur in an alphanumeric string STR:
- S:=space(translate(xrange('0','z'),STR,left('',length(STR),' ')),0);
-
- Generate a sorted string S, each of who's characters occurs at least
- once in an alphanumeric string STR:
- S:=space(translate(xrange('0','z'),translate(xrange('0','z'),
- STR,left('',length(STR),' ')),left('',75,' ')),0);
-
- Permute the characters in a 'MM/DD/YY' date string to allow for
- easy date comparison:
- translate('78312645','12345678','12/25/88') returns '88/12/25'.
-
- A simple text formatting example is in TXTFMT.PAS.
-
-
- The Fine Print
- --------------
-
- STRINGS.TPU is copyright 1989 by Richard Winkel. The ASM & OBJ
- files are available from me for $10 if you send me a formatted
- 360K disk in a stamped self addressed mailer. If you want to
- avoid the hassle, add $3 and I'll buy the floppy, mailer and
- postage.
-
- Note: Purchase of the ASM & OBJ files confers an unlimited license to
- incorporate these routines into your own programs for any purpose,
- commercial or otherwise.
-
- Send cash, check or money order to:
-
- Richard Winkel
- Route 1, box 193
- Harrisburg, MO. 65256
-
- Note: You don't need an assembler to recompile the TPU, as long as
- you have the OBJ files and a Turbo Pascal compiler.
-
- Internet address:
- MATHRICH@UMCVMB.BITNET,
- MATHRICH@UMCVMB.MISSOURI.EDU or
- MATHRICH%UMCVMB@CUNYVM.CUNY.EDU
-
- ------------------------------- cut here ---------------------------
-
- Calling Syntax Guide to STRINGS.TPU
-
- type xstr=string[4];
-
- function abbrev(str,abbr:string; len:byte):boolean;
- function center(s:string; width:byte; pad:char):string;
- function compare(s1,s2:string):byte;
- function copies(str:string; count:byte):string;
- function d2x(i:word):xstr;
- function delstr(str:string; pos,len:byte):string;
- function delword(str:string; n,len:byte):string;
- function firstpos(findstr,instr:string; start:byte):byte;
- function instr(new,str:string; pos:byte; pad:char):string;
- function justify(str:string; len:byte):string;
- function lastpos(findstr,instr:string; start:byte):byte;
- function left(str:string; width:byte; pad:char):string;
- function lowercase(s:string):string;
- function overlay(new,str:string; pos:byte; pad:char):string;
- function pos2word(s:string; pos:byte):byte;
- function reverse(str:string):string;
- function right(str:string; width:byte; pad:char):string;
- function space(str:string; spc:byte):string;
- function strip(s:string; opt,ch:char):string; {opt='L', 'T' or 'B'}
- function substr(str:string; pos,len:byte):string;
- function subword(str:string; num,count:byte):string;
- function translate(str,intable,outable:string):string;
- function uppercase(s:string):string;
- function verify(str,ref:string; opt:char; start:byte):byte; {opt='M' or 'N'}
- function werd(s:string;c:byte):string;
- function word2pos(s:string; wrd:byte):byte;
- function words(s:string):byte;
- function x2d(str:xstr):word;
- function xrange(c1,c2:char):string;