home *** CD-ROM | disk | FTP | other *** search
- /*---------------------------------------------------------------------------
- *
- * Copyright (c) 1993 by Westmount Technology B.V., Delft, The Netherlands.
- *
- * This software is furnished under a license and may be used only in
- * accordance with the terms of such license and with the inclusion of
- * the above copyright notice. This software or any other copies thereof
- * may not be provided or otherwise made available to any other person.
- * No title to and ownership of the software is hereby transferred.
- *
- * The information in this software is subject to change without notice
- * and should not be construed as a commitment by Westmount Technology B.V.
- *
- *---------------------------------------------------------------------------
- *
- * File : @(#)String.cxx /main/hindenburg/1 (1.7) (1.5)
- * Author : NIH, erel
- * Original date :
- * Description : Class String implements reference counted character
- * String objects. Operations provided include + (concatenation)
- * and () (SubString extraction). Type conversions between
- * String and char* are provided, permitting the two to be used
- * interchangeably in many contexts.
- * History : March 1993: addded reference counting
- *
- * Originally derived from String in the NIH-classes
- * Authors:
- * C. J. Eppich and K. E. Gorlen
- * Bg. 12A, Rm. 2033
- * Computer Systems Laboratory
- * Division of Computer Research and Technology
- * National Institutes of Health
- * Bethesda, Maryland 20892
- * uucp: uunet!nih-csl!kgorlen
- * Internet: kgorlen@alw.nih.gov
- *
- *---------------------------------------------------------------------------
- */
- static const char SccsId[]=
- "@(#)String.cxx /main/hindenburg/1 (1.7) (1.5)\t09 Jan 1996 Copyright 1993 Westmount Technology";
-
- #include "String.hxx"
-
- #include <stdlib.h>
- #include <string.h>
- #include <iostream.h>
- /* ctype used in an NLS-safe way! */
- #include <ctype.h>
- #include <malloc.h>
- #include <new.h>
- #include <assert.h>
-
- /*
- ** MSVC MFC output
- */
- #ifdef _MSC_VER
- #define NOCRTOUTPUT 1
- #endif
-
- /*
- ** Allocation
- */
-
- static void
- insufficient_memory(void)
- {
- void (*old_new_handler)(void) = set_new_handler(0);
- if ( !old_new_handler ) {
- #ifndef NOCRTOUTPUT
- cerr << "[INSUFMEM] No stringspace left" << endl;
- #endif
- abort();
- }
-
- (*old_new_handler)();
- set_new_handler(old_new_handler);
- return;
- }
-
- static char*
- new_String(size_t sz)
- {
- void *buf = malloc(sz);
- if ( buf )
- return (char*)buf;
-
- insufficient_memory();
- return NULL;
- }
-
- static char*
- string_realloc(char* p, size_t sz)
- {
- void *buf = realloc(p, sz);
- if ( buf )
- return (char*)buf;
-
- insufficient_memory();
- return NULL;
- }
-
-
- inline unsigned int
- MIN(unsigned int a, unsigned int b)
- { return a <= b ? a : b; }
-
-
- /* System-independent versions of toupper and tolower */
-
- inline char
- to_upper(unsigned char c)
- { return (islower((int)c) ? toupper((int)c): c); }
-
- inline char
- to_lower(unsigned char c)
- { return (isupper((int)c) ? tolower((int)c): c); }
-
-
- inline unsigned
- mod_sizeof_int(unsigned i)
- { return sizeof(int)&sizeof(int)-1 ? i%sizeof(int) : i&sizeof(int)-1; }
-
- inline unsigned
- div_sizeof_int(unsigned i)
- { return i >> 2; }
-
-
- /*
- ** String representation
- */
-
- struct fakeRep
- /*
- ** Hack to circumvent initialization (ordering) problems:
- ** theEmptyRep should be fully initialized before any (global)
- ** empty String is created.
- */
- {
- unsigned rc;
- unsigned ln;
- char* p;
- };
-
- static int sanity_check(void)
- {
- assert( sizeof(fakeRep) == sizeof(StringRep) );
- return 1;
- }
-
- static int fake_ok = sanity_check();
- static fakeRep empty_rep = { 1, 0, "" };
- StringRep* const String::theEmptyRep = (StringRep* const)& empty_rep;
-
-
- StringRep::StringRep(void):
- refcount (1),
- len (0)
- {
- p = new_String(1);
- p[0] = '\0';
- }
-
-
- StringRep::StringRep(const StringRep& sr):
- refcount (1),
- len (sr.len)
- {
- p = new_String(len+1);
- strcpy(p, sr.p);
- }
-
-
- StringRep::StringRep(const char* cp):
- refcount (1),
- len (strlen(cp))
- {
- p = new_String(len+1);
- strcpy(p, cp);
- }
-
-
- StringRep::StringRep(char* buf, unsigned buflen):
- refcount (1),
- len (buflen-1),
- p (buf)
- { /* empty */ }
-
-
- StringRep::~StringRep(void)
- {
- free(p);
- }
-
-
- const StringRep&
- StringRep::operator= (const StringRep& sr)
- {
- if ( &sr != this ) {
- refcount = 1;
- len = sr.len;
- p = string_realloc(p, len+1);
- strcpy(p, sr.p);
- }
- return *this;
- }
-
-
- unsigned
- StringRep::hash() const
- {
- register unsigned h = len;
- register unsigned i = div_sizeof_int(len);
- register unsigned* q = (unsigned*)p;
- while (i--) h ^= *q++;
- if ((i = mod_sizeof_int(len)) != 0)
- {
- unsigned tailHash = 0;
- memcpy((char*)&tailHash, (char*)q, i);
- h ^= tailHash;
- }
- return h;
- }
-
-
- void
- StringRep::toAscii(void)
- {
- register unsigned i = len;
- register char* q = p;
- while (i--) { *q = toascii((int)((unsigned char)*q)); q++; }
- }
-
- void
- StringRep::toLower()
- {
- register unsigned i = len;
- register char* q = p;
- while (i--) { *q = to_lower(*q); q++; }
- }
-
- void
- StringRep::toUpper()
- {
- register unsigned i = len;
- register char* q = p;
- while (i--) { *q = to_upper(*q); q++; }
- }
-
- void
- StringRep::replace(const Range& r, const char* src, unsigned srclen)
- // Replace a SubString with the argument string
- // Terminology:
- // head: portion of destination String before this SubString
- // tail: portion of destination String after this SubString
- {
- # ifdef DEBUG
- cerr << "replacing " << p << '[' << r << ']' << " by ";
- cerr.write(src, srclen);
- cerr << " ...\n";
- # endif
-
- int overlap = 0; // src overlaps destination string
- int tailDelta = 0; // amount to adjust for tail movement
- char* srcbuf = 0; // buffer to hold src if it overlaps SubString
-
- if (src >= p && src <= &p[len]) {
- // src overlaps destination string
- overlap = 1;
- if (src > &p[r.lastIndex()]) {
- // src overlaps only tail of destination string
- tailDelta = srclen-r.length();
- }
- else {
- if (src+srclen > &p[r.firstIndex()]) {
- // src overlaps the SubString: move src to buffer
- srcbuf = new char[srclen];
- strncpy(srcbuf,src,srclen);
- src = srcbuf;
- overlap = 0; // no overlap now
- }
- }
- }
-
- # ifdef DEBUG
- cerr << "overlap=" << overlap
- << " tailDelta=" << tailDelta
- << " srcbuf=" << (const void*)srcbuf << '\n';
- # endif
-
- char* old_p = p;
- unsigned new_len = len + srclen - r.length();
- if ( new_len > len ) // stretch
- p = string_realloc(p, new_len + 1);
-
- if ( overlap ) src += p - old_p;
-
- if ( srclen < r.length() ) {
- // shift tail down
- register const char* src_p = &p[r.lastIndex()+1];
- register char* dst_p = &p[r.firstIndex()+srclen];
- while (*dst_p++ = *src_p++);
- }
- else if ( srclen > r.length() ) {
- // shift tail up
- register const char* src_p = &p[len];
- register char* dst_p = &p[new_len];
- register unsigned n = len - r.lastIndex();
- while (n--) *dst_p-- = *src_p--;
- }
- src += tailDelta;
-
- if ( new_len < len ) {
- // shrink
- p = string_realloc(p, new_len+1);
- p[new_len] = '\0';
- }
- len = new_len;
-
- # ifdef DEBUG
- cerr << "target " << *this << " source ";
- cerr.write(src, srclen);
- cerr << endl;
- # endif
-
- strncpy(&p[r.firstIndex()], src, srclen); // insert src into destination
- delete srcbuf;
-
- # ifdef DEBUG
- cerr << "... result: " << *this << '\n';
- # endif
- }
-
-
- void
- StringRep::append(const char* src, unsigned srclen)
- // Append src to buffer
- {
- p = string_realloc(p, len+srclen+1);
- strncpy(&p[len], src, srclen);
- len += srclen;
- p[len] = '\0';
- }
-
-
-
- //==== SubString functions:
-
- /*
- The following compare functions were implemented because strncmp is
- not adequate for comparing character strings of unequal length. For
- example, strncmp("abc","abcd",3) will return 0.
- */
-
- int
- SubString::compare(const char* cs) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than cs.
- */
- {
- int cl = strlen(cs);
- int result = strncmp(ptr(), cs, length());
- if (result != 0 || length() == cl) return result;
- return (length()>cl ? 1 : -1);
- }
-
- int
- SubString::compare(const String& s) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than s.
- */
- {
- int result = strncmp(ptr(), s, length());
- if (result != 0 || length() == s.length()) return result;
- return (length()>s.length() ? 1 : -1);
- }
-
- int
- SubString::compare(const SubString& ss) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than SubString ss.
- */
- {
- int result = strncmp(ptr(), ss.ptr(), MIN(length(), ss.length()));
- if (result != 0 || length() == ss.length()) return result;
- return (length()>ss.length() ? 1 : -1);
- }
-
-
- int
- strncoll(const char* s1, const char* s2, int n)
- {
- char *replace_p = NULL;
- char replace_c = '\0';
-
- if ( strlen(s1) > n )
- replace_p = (char*)(&s1[n]);
- else if ( strlen(s2) > n )
- replace_p = (char*)(&s2[n]);
-
- if ( replace_p ) {
- replace_c = *replace_p;
- *replace_p = '\0';
- }
-
- // Here at least one String is not longer than n
- // so we can strcoll() them.
- //
- int result = strcoll(s1, s2);
-
- // Repair fumbled string
- if ( replace_p ) *replace_p = replace_c;
-
- return result;
- }
-
-
- int
- SubString::collate(const char* cs) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than cs (According to the LC_COLLATE category of the current locale).
- */
- {
- int cl = strlen(cs);
- int result = strncoll(ptr(), cs, length());
- if (result != 0 || length() == cl) return result;
- return (length()>cl ? 1 : -1);
- }
-
- int
- SubString::collate(const String& s) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than s (According to the LC_COLLATE category of the current locale).
- */
- {
- int result = strncoll(ptr(), s, length());
- if (result != 0 || length() == s.length()) return result;
- return (length()>s.length() ? 1 : -1);
- }
-
- int
- SubString::collate(const SubString& ss) const
- /*
- Return integer greater than, equal to, or less than 0, according as
- this SubString is lexicographically greater than, equal to, or less
- than SubString ss (According to the LC_COLLATE category of the current
- locale).
- */
- {
- // avoid double inline expansion (too complex!?)
- const char* ss_ptr = ss.ptr();
- int ss_len = ss.length();
- int result = strncoll(ptr(), ss_ptr, MIN(length(), ss_len));
- if (result != 0 || length() == ss_len) return result;
- return (length()>ss_len ? 1 : -1);
- }
-
- SubString::SubString(const String& s, const Range& r):
- st ((String&)s),
- sr (r)
- {
- checkSubStr();
- }
-
- void
- SubString::dumpOn(ostream& strm) const
- // Dump this SubString on output stream strm.
- {
- strm << String(*this);
- strm << '[' << st;
- strm << '(' << sr;
- strm << ")]";
- }
-
- void
- SubString::printOn(ostream& strm) const
- // Print this SubString on output stream strm.
- {
- strm << String(*this);
- }
-
-
- static char* scratch_buf = NULL; // scratch buffer
-
- /*
- ** operator+ duplicates code to avoid creation of
- ** unneccessary temporary strings
- */
-
- String
- SubString::operator+(const SubString& ss) const
- {
- unsigned l1 = length();
- unsigned l2 = ss.length();
-
- delete scratch_buf; // previous result
- scratch_buf = new char[l1 + l2 + 1];
-
- strncpy (scratch_buf, ptr(), l1);
- strncpy (&scratch_buf[l1], ss.ptr(), l2);
- scratch_buf[l1+l2] = '\0';
- return String(scratch_buf);
- }
-
- String
- SubString::operator+(const String& s) const
- {
- unsigned l1 = length();
- unsigned l2 = s.length();
-
- delete scratch_buf; // previous result
- scratch_buf = new char[l1 + l2 + 1];
-
- strncpy (scratch_buf, ptr(), l1);
- strncpy (&scratch_buf[l1], s, l2);
- scratch_buf[l1+l2] = '\0';
- return String(scratch_buf);
- }
-
- String
- SubString::operator+(const char* cs) const
- {
- unsigned l1 = length();
- unsigned l2 = strlen(cs);
-
- delete scratch_buf; // previous result
- scratch_buf = new char[l1 + l2 + 1];
-
- strncpy (scratch_buf, ptr(), l1);
- strncpy (&scratch_buf[l1], cs, l2);
- scratch_buf[l1+l2] = '\0';
- return String(scratch_buf);
- }
-
- String
- SubString::operator+(char c) const
- {
- unsigned l1 = length();
-
- delete scratch_buf; // previous result
- scratch_buf = new char[l1 + 1 + 1];
-
- strncpy (scratch_buf, ptr(), l1);
- scratch_buf[l1] = c;
- scratch_buf[l1+1] = '\0';
- return String(scratch_buf);
- }
-
- String
- operator+(const char* cs, const SubString& ss)
- {
- unsigned l1 = strlen(cs);
- unsigned l2 = ss.length();
-
- delete scratch_buf; // previous result
- scratch_buf = new char[l1 + l2 + 1];
-
- strncpy (scratch_buf, cs, l1);
- strncpy (&scratch_buf[l1], ss.ptr(), l2);
- scratch_buf[l1+l2] = '\0';
- return String(scratch_buf);
- }
-
- String
- operator+(char c, const SubString& ss)
- {
- unsigned l2 = ss.length();
-
- delete scratch_buf; // previous result
- scratch_buf = new char[1 + l2 + 1];
-
- scratch_buf[0] = c;
- strncpy (&scratch_buf[1], ss.ptr(), l2);
- scratch_buf[1+l2] = '\0';
- return String(scratch_buf);
- }
-
- void
- SubString::checkSubStr() const
- // check for legal SubString
- {
- if ( sr.valid() && sr.lastIndex() <= (int)st.rep->buf_len() )
- return;
-
- #ifndef NOCRTOUTPUT
- cerr << "[SUBSTRERR] Substring position/lenght out of Range ("
- << sr.firstIndex() << ',';
- cerr << sr.length() << ')'
- << endl;
- #endif
- abort();
- }
-
- //==== String functions:
-
- void
- String::indexRangeErr() const
- {
- #ifndef NOCRTOUTPUT
- cerr << "[INDEXRANGE] Index out of Range: string@"
- << (void*)this
- << endl;
- #endif
- abort();
- }
-
- void
- String::unique()
- {
- if ( !rep->shared() ) return;
-
- StringRep* new_rep = new StringRep(*rep);
- rep->removeRef();
- rep = new_rep;
- }
-
- //==== String Constructors:
-
- String::String()
- {
- rep = theEmptyRep;
- rep->addRef();
- }
-
- String::String(const char* cs)
- {
- if ( cs[0] == '\0' ) {
- rep = theEmptyRep;
- rep->addRef();
- }
- else
- rep = new StringRep(cs);
- }
-
- String::String(const String& s):
- rep (s.rep)
- {
- rep->addRef();
- }
-
- String::String(const char c, unsigned l)
- {
- assert( l != 1 || isprint(c) );
-
- char* new_buf = new_String(l+1);
- register char* p = &new_buf[l+1];
- *--p = '\0';
- while ( p > new_buf ) *--p = c;
-
- rep = new StringRep(new_buf, l+1);
- }
-
- String::String(const char c)
- {
- char* new_buf = new_String(2);
- new_buf[0] = c;
- new_buf[1] = '\0';
- rep = new StringRep(new_buf, 2);
- }
-
- String::String(const SubString& ss)
- {
- unsigned buflen = ss.length();
- char* new_buf = new_String(buflen+1);
- strncpy(new_buf, ss.ptr(), buflen);
- new_buf[buflen] = '\0';
- rep = new StringRep(new_buf, buflen+1);
- }
-
- String::~String()
- {
- rep->removeRef();
- }
-
- //==== Operators:
-
- SubString
- String::operator()(const Range& r)
- {
- if ( ! r.valid() ) {
- #ifndef NOCRTOUTPUT
- cerr << "[BADRANGE] Invalid or undefined Range ["
- << r
- << "] for SubString" << endl;
- #endif
- abort();
- }
- return SubString(*this, r);
- }
-
- const SubString
- String::operator()(const Range& r) const
- {
- if ( ! r.valid() ) {
- #ifndef NOCRTOUTPUT
- cerr << "[BADRANGE] Invalid or undefined Range ["
- << r
- << "] for SubString" << endl;
- #endif
- abort();
- }
- return SubString(*this, r);
- }
-
- void
- String::operator=(const String& s)
- {
- if ( &s == this ) return;
- rep->removeRef();
- rep = s.rep;
- rep->addRef();
- }
-
- void
- String::operator=(const SubString& ss)
- {
- unsigned ln = ss.length();
- char* new_buf = new_String(ln+1);
- strncpy(new_buf, ss.ptr(), ln);
- new_buf[ln] = '\0';
- rep->removeRef();
- rep = new StringRep(new_buf, ln+1);
- }
-
- void
- String::operator=(const char* cs)
- {
- rep->removeRef();
- rep = new StringRep(cs);
- }
-
- String
- String::operator+(const String& s) const
- {
- unsigned l1 = length();
- unsigned l2 = s.length();
-
- delete scratch_buf;
- scratch_buf = new char[l1+l2+1];
- strcpy(scratch_buf, rep->buf_ptr());
- strcpy(&scratch_buf[l1], s.rep->buf_ptr());
-
- return String(scratch_buf);
- }
-
- String
- String::operator+(const SubString& ss) const
- {
- unsigned l1 = length();
- unsigned l2 = ss.length();
-
- delete scratch_buf;
- scratch_buf = new char[l1+l2+1];
- strcpy(scratch_buf, rep->buf_ptr());
- strncpy(&scratch_buf[l1], ss.ptr(), l2);
- scratch_buf[l1+l2] = '\0';
-
- return String(scratch_buf);
- }
-
- String
- String::operator+(const char* cs) const
- {
- unsigned l1 = length();
- unsigned l2 = strlen(cs);
-
- delete scratch_buf;
- scratch_buf = new char[l1+l2+1];
- strcpy(scratch_buf, rep->buf_ptr());
- strcpy(&scratch_buf[l1], cs);
-
- return String(scratch_buf);
- }
-
- String
- String::operator+(char c) const
- {
- unsigned l1 = length();
-
- delete scratch_buf;
- scratch_buf = new char[l1+1+1];
- strcpy(scratch_buf, rep->buf_ptr());
- scratch_buf[l1] = c;
- scratch_buf[l1+1] = '\0';
-
- return String(scratch_buf);
- }
-
- String
- operator+(const char* cs, const String& s)
- {
- unsigned l1 = strlen(cs);
- unsigned l2 = s.length();
-
- delete scratch_buf;
- scratch_buf = new char[l1+l2+1];
- strcpy(scratch_buf, cs);
- strcpy(&scratch_buf[l1], s);
-
- return String(scratch_buf);
- }
-
- String
- operator+(char c, const String& s)
- {
- unsigned l2 = s.length();
-
- delete scratch_buf;
- scratch_buf = new char[1+l2+1];
- scratch_buf[0] = c;
- strcpy(&scratch_buf[1], s);
-
- return String(scratch_buf);
- }
-
- void
- String::operator+=(const String& s)
- // Concatenate a String with another
- {
- unique();
- rep->append(s, s.length());
- }
-
- void
- String::operator+=(const SubString& ss)
- {
- unique();
- rep->append(ss.ptr(), ss.length());
- }
-
- void
- String::operator+=(const char* cs)
- {
- unique();
- rep->append(cs, strlen(cs));
- }
-
- void
- String::operator+=(char c)
- {
- unique();
- rep->append(&c, 1);
- }
-
- char&
- String::operator[] (unsigned i)
- {
- if (i >= length()) indexRangeErr();
- unique();
- return at(i);
- }
-
- char
- String::operator[] (unsigned i) const
- {
- if (i >= length()) indexRangeErr();
- return rep->buf_ptr()[i];
- }
-
-
- int
- String::index(char c, unsigned start_pos) const
- {
- if ( c == '\0' || start_pos > length() ) return -1;
-
- const char* where = ::strchr(&rep->buf_ptr()[start_pos],c);
- return ( where ? where - rep->buf_ptr() : -1 );
- }
-
-
- int
- String::rindex(char c, unsigned start_pos) const
- {
- register const char* where;
-
- if ( start_pos > length() ) start_pos = length();
-
- for ( where = &rep->buf_ptr()[start_pos]; where > rep->buf_ptr(); where-- )
- {
- if ( *where == c ) {
- return (where - rep->buf_ptr());
- }
- }
- /* where == rep->p */
- if ( *where == c )
- return 0;
-
- return -1;
- }
-
-
- void
- String::printOn(ostream& strm) const
- {
- strm << rep->buf_ptr();
- }
-
- void
- String::scanFrom(istream& strm)
- // Read next line of input from strm into this string.
- {
- const INPUTBUFSIZE = 512;
-
- ostream* os = strm.tie((ostream*)0);
- if (os != 0) {
- os->flush();
- strm.tie(os);
- }
- char c;
- strm.get(c);
- if (c != '\n') strm.putback(c);
- char temp[INPUTBUFSIZE+1];
- strm.get(temp,INPUTBUFSIZE+1);
- *this = String(temp);
-
- while ( strm.gcount() >= INPUTBUFSIZE ) {
- strm.get(temp,INPUTBUFSIZE+1);
- *this += temp;
- }
- }
-
-