PC World 1997 November

home *** CD-ROM | disk | FTP | other *** search

/ PC World 1997 November / PCWorld_1997-11_cd.bin / software / programy / komix / DATA.Z / String.cxx < prev next >

Wrap

C/C++ Source or Header | 1997-06-05 | 19.7 KB | 931 lines

/*--------------------------------------------------------------------------- * * Copyright (c) 1993 by Westmount Technology B.V., Delft, The Netherlands. * * This software is furnished under a license and may be used only in * accordance with the terms of such license and with the inclusion of * the above copyright notice. This software or any other copies thereof * may not be provided or otherwise made available to any other person. * No title to and ownership of the software is hereby transferred. * * The information in this software is subject to change without notice * and should not be construed as a commitment by Westmount Technology B.V. * *--------------------------------------------------------------------------- * * File : @(#)String.cxx /main/hindenburg/1 (1.7) (1.5) * Author : NIH, erel * Original date : * Description : Class String implements reference counted character * String objects. Operations provided include + (concatenation) * and () (SubString extraction). Type conversions between * String and char* are provided, permitting the two to be used * interchangeably in many contexts. * History : March 1993: addded reference counting * * Originally derived from String in the NIH-classes * Authors: * C. J. Eppich and K. E. Gorlen * Bg. 12A, Rm. 2033 * Computer Systems Laboratory * Division of Computer Research and Technology * National Institutes of Health * Bethesda, Maryland 20892 * uucp: uunet!nih-csl!kgorlen * Internet: kgorlen@alw.nih.gov * *--------------------------------------------------------------------------- */ static const char SccsId[]= "@(#)String.cxx /main/hindenburg/1 (1.7) (1.5)\t09 Jan 1996 Copyright 1993 Westmount Technology"; #include "String.hxx" #include <stdlib.h> #include <string.h> #include <iostream.h> /* ctype used in an NLS-safe way! */ #include <ctype.h> #include <malloc.h> #include <new.h> #include <assert.h> /* ** MSVC MFC output */ #ifdef _MSC_VER #define NOCRTOUTPUT 1 #endif /* ** Allocation */ static void insufficient_memory(void) { void (*old_new_handler)(void) = set_new_handler(0); if ( !old_new_handler ) { #ifndef NOCRTOUTPUT cerr << "[INSUFMEM] No stringspace left" << endl; #endif abort(); } (*old_new_handler)(); set_new_handler(old_new_handler); return; } static char* new_String(size_t sz) { void *buf = malloc(sz); if ( buf ) return (char*)buf; insufficient_memory(); return NULL; } static char* string_realloc(char* p, size_t sz) { void *buf = realloc(p, sz); if ( buf ) return (char*)buf; insufficient_memory(); return NULL; } inline unsigned int MIN(unsigned int a, unsigned int b) { return a <= b ? a : b; } /* System-independent versions of toupper and tolower */ inline char to_upper(unsigned char c) { return (islower((int)c) ? toupper((int)c): c); } inline char to_lower(unsigned char c) { return (isupper((int)c) ? tolower((int)c): c); } inline unsigned mod_sizeof_int(unsigned i) { return sizeof(int)&sizeof(int)-1 ? i%sizeof(int) : i&sizeof(int)-1; } inline unsigned div_sizeof_int(unsigned i) { return i >> 2; } /* ** String representation */ struct fakeRep /* ** Hack to circumvent initialization (ordering) problems: ** theEmptyRep should be fully initialized before any (global) ** empty String is created. */ { unsigned rc; unsigned ln; char* p; }; static int sanity_check(void) { assert( sizeof(fakeRep) == sizeof(StringRep) ); return 1; } static int fake_ok = sanity_check(); static fakeRep empty_rep = { 1, 0, "" }; StringRep* const String::theEmptyRep = (StringRep* const)& empty_rep; StringRep::StringRep(void): refcount (1), len (0) { p = new_String(1); p[0] = '\0'; } StringRep::StringRep(const StringRep& sr): refcount (1), len (sr.len) { p = new_String(len+1); strcpy(p, sr.p); } StringRep::StringRep(const char* cp): refcount (1), len (strlen(cp)) { p = new_String(len+1); strcpy(p, cp); } StringRep::StringRep(char* buf, unsigned buflen): refcount (1), len (buflen-1), p (buf) { /* empty */ } StringRep::~StringRep(void) { free(p); } const StringRep& StringRep::operator= (const StringRep& sr) { if ( &sr != this ) { refcount = 1; len = sr.len; p = string_realloc(p, len+1); strcpy(p, sr.p); } return *this; } unsigned StringRep::hash() const { register unsigned h = len; register unsigned i = div_sizeof_int(len); register unsigned* q = (unsigned*)p; while (i--) h ^= *q++; if ((i = mod_sizeof_int(len)) != 0) { unsigned tailHash = 0; memcpy((char*)&tailHash, (char*)q, i); h ^= tailHash; } return h; } void StringRep::toAscii(void) { register unsigned i = len; register char* q = p; while (i--) { *q = toascii((int)((unsigned char)*q)); q++; } } void StringRep::toLower() { register unsigned i = len; register char* q = p; while (i--) { *q = to_lower(*q); q++; } } void StringRep::toUpper() { register unsigned i = len; register char* q = p; while (i--) { *q = to_upper(*q); q++; } } void StringRep::replace(const Range& r, const char* src, unsigned srclen) // Replace a SubString with the argument string // Terminology: // head: portion of destination String before this SubString // tail: portion of destination String after this SubString { # ifdef DEBUG cerr << "replacing " << p << '[' << r << ']' << " by "; cerr.write(src, srclen); cerr << " ...\n"; # endif int overlap = 0; // src overlaps destination string int tailDelta = 0; // amount to adjust for tail movement char* srcbuf = 0; // buffer to hold src if it overlaps SubString if (src >= p && src <= &p[len]) { // src overlaps destination string overlap = 1; if (src > &p[r.lastIndex()]) { // src overlaps only tail of destination string tailDelta = srclen-r.length(); } else { if (src+srclen > &p[r.firstIndex()]) { // src overlaps the SubString: move src to buffer srcbuf = new char[srclen]; strncpy(srcbuf,src,srclen); src = srcbuf; overlap = 0; // no overlap now } } } # ifdef DEBUG cerr << "overlap=" << overlap << " tailDelta=" << tailDelta << " srcbuf=" << (const void*)srcbuf << '\n'; # endif char* old_p = p; unsigned new_len = len + srclen - r.length(); if ( new_len > len ) // stretch p = string_realloc(p, new_len + 1); if ( overlap ) src += p - old_p; if ( srclen < r.length() ) { // shift tail down register const char* src_p = &p[r.lastIndex()+1]; register char* dst_p = &p[r.firstIndex()+srclen]; while (*dst_p++ = *src_p++); } else if ( srclen > r.length() ) { // shift tail up register const char* src_p = &p[len]; register char* dst_p = &p[new_len]; register unsigned n = len - r.lastIndex(); while (n--) *dst_p-- = *src_p--; } src += tailDelta; if ( new_len < len ) { // shrink p = string_realloc(p, new_len+1); p[new_len] = '\0'; } len = new_len; # ifdef DEBUG cerr << "target " << *this << " source "; cerr.write(src, srclen); cerr << endl; # endif strncpy(&p[r.firstIndex()], src, srclen); // insert src into destination delete srcbuf; # ifdef DEBUG cerr << "... result: " << *this << '\n'; # endif } void StringRep::append(const char* src, unsigned srclen) // Append src to buffer { p = string_realloc(p, len+srclen+1); strncpy(&p[len], src, srclen); len += srclen; p[len] = '\0'; } //==== SubString functions: /* The following compare functions were implemented because strncmp is not adequate for comparing character strings of unequal length. For example, strncmp("abc","abcd",3) will return 0. */ int SubString::compare(const char* cs) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than cs. */ { int cl = strlen(cs); int result = strncmp(ptr(), cs, length()); if (result != 0 || length() == cl) return result; return (length()>cl ? 1 : -1); } int SubString::compare(const String& s) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than s. */ { int result = strncmp(ptr(), s, length()); if (result != 0 || length() == s.length()) return result; return (length()>s.length() ? 1 : -1); } int SubString::compare(const SubString& ss) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than SubString ss. */ { int result = strncmp(ptr(), ss.ptr(), MIN(length(), ss.length())); if (result != 0 || length() == ss.length()) return result; return (length()>ss.length() ? 1 : -1); } int strncoll(const char* s1, const char* s2, int n) { char *replace_p = NULL; char replace_c = '\0'; if ( strlen(s1) > n ) replace_p = (char*)(&s1[n]); else if ( strlen(s2) > n ) replace_p = (char*)(&s2[n]); if ( replace_p ) { replace_c = *replace_p; *replace_p = '\0'; } // Here at least one String is not longer than n // so we can strcoll() them. // int result = strcoll(s1, s2); // Repair fumbled string if ( replace_p ) *replace_p = replace_c; return result; } int SubString::collate(const char* cs) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than cs (According to the LC_COLLATE category of the current locale). */ { int cl = strlen(cs); int result = strncoll(ptr(), cs, length()); if (result != 0 || length() == cl) return result; return (length()>cl ? 1 : -1); } int SubString::collate(const String& s) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than s (According to the LC_COLLATE category of the current locale). */ { int result = strncoll(ptr(), s, length()); if (result != 0 || length() == s.length()) return result; return (length()>s.length() ? 1 : -1); } int SubString::collate(const SubString& ss) const /* Return integer greater than, equal to, or less than 0, according as this SubString is lexicographically greater than, equal to, or less than SubString ss (According to the LC_COLLATE category of the current locale). */ { // avoid double inline expansion (too complex!?) const char* ss_ptr = ss.ptr(); int ss_len = ss.length(); int result = strncoll(ptr(), ss_ptr, MIN(length(), ss_len)); if (result != 0 || length() == ss_len) return result; return (length()>ss_len ? 1 : -1); } SubString::SubString(const String& s, const Range& r): st ((String&)s), sr (r) { checkSubStr(); } void SubString::dumpOn(ostream& strm) const // Dump this SubString on output stream strm. { strm << String(*this); strm << '[' << st; strm << '(' << sr; strm << ")]"; } void SubString::printOn(ostream& strm) const // Print this SubString on output stream strm. { strm << String(*this); } static char* scratch_buf = NULL; // scratch buffer /* ** operator+ duplicates code to avoid creation of ** unneccessary temporary strings */ String SubString::operator+(const SubString& ss) const { unsigned l1 = length(); unsigned l2 = ss.length(); delete scratch_buf; // previous result scratch_buf = new char[l1 + l2 + 1]; strncpy (scratch_buf, ptr(), l1); strncpy (&scratch_buf[l1], ss.ptr(), l2); scratch_buf[l1+l2] = '\0'; return String(scratch_buf); } String SubString::operator+(const String& s) const { unsigned l1 = length(); unsigned l2 = s.length(); delete scratch_buf; // previous result scratch_buf = new char[l1 + l2 + 1]; strncpy (scratch_buf, ptr(), l1); strncpy (&scratch_buf[l1], s, l2); scratch_buf[l1+l2] = '\0'; return String(scratch_buf); } String SubString::operator+(const char* cs) const { unsigned l1 = length(); unsigned l2 = strlen(cs); delete scratch_buf; // previous result scratch_buf = new char[l1 + l2 + 1]; strncpy (scratch_buf, ptr(), l1); strncpy (&scratch_buf[l1], cs, l2); scratch_buf[l1+l2] = '\0'; return String(scratch_buf); } String SubString::operator+(char c) const { unsigned l1 = length(); delete scratch_buf; // previous result scratch_buf = new char[l1 + 1 + 1]; strncpy (scratch_buf, ptr(), l1); scratch_buf[l1] = c; scratch_buf[l1+1] = '\0'; return String(scratch_buf); } String operator+(const char* cs, const SubString& ss) { unsigned l1 = strlen(cs); unsigned l2 = ss.length(); delete scratch_buf; // previous result scratch_buf = new char[l1 + l2 + 1]; strncpy (scratch_buf, cs, l1); strncpy (&scratch_buf[l1], ss.ptr(), l2); scratch_buf[l1+l2] = '\0'; return String(scratch_buf); } String operator+(char c, const SubString& ss) { unsigned l2 = ss.length(); delete scratch_buf; // previous result scratch_buf = new char[1 + l2 + 1]; scratch_buf[0] = c; strncpy (&scratch_buf[1], ss.ptr(), l2); scratch_buf[1+l2] = '\0'; return String(scratch_buf); } void SubString::checkSubStr() const // check for legal SubString { if ( sr.valid() && sr.lastIndex() <= (int)st.rep->buf_len() ) return; #ifndef NOCRTOUTPUT cerr << "[SUBSTRERR] Substring position/lenght out of Range (" << sr.firstIndex() << ','; cerr << sr.length() << ')' << endl; #endif abort(); } //==== String functions: void String::indexRangeErr() const { #ifndef NOCRTOUTPUT cerr << "[INDEXRANGE] Index out of Range: string@" << (void*)this << endl; #endif abort(); } void String::unique() { if ( !rep->shared() ) return; StringRep* new_rep = new StringRep(*rep); rep->removeRef(); rep = new_rep; } //==== String Constructors: String::String() { rep = theEmptyRep; rep->addRef(); } String::String(const char* cs) { if ( cs[0] == '\0' ) { rep = theEmptyRep; rep->addRef(); } else rep = new StringRep(cs); } String::String(const String& s): rep (s.rep) { rep->addRef(); } String::String(const char c, unsigned l) { assert( l != 1 || isprint(c) ); char* new_buf = new_String(l+1); register char* p = &new_buf[l+1]; *--p = '\0'; while ( p > new_buf ) *--p = c; rep = new StringRep(new_buf, l+1); } String::String(const char c) { char* new_buf = new_String(2); new_buf[0] = c; new_buf[1] = '\0'; rep = new StringRep(new_buf, 2); } String::String(const SubString& ss) { unsigned buflen = ss.length(); char* new_buf = new_String(buflen+1); strncpy(new_buf, ss.ptr(), buflen); new_buf[buflen] = '\0'; rep = new StringRep(new_buf, buflen+1); } String::~String() { rep->removeRef(); } //==== Operators: SubString String::operator()(const Range& r) { if ( ! r.valid() ) { #ifndef NOCRTOUTPUT cerr << "[BADRANGE] Invalid or undefined Range [" << r << "] for SubString" << endl; #endif abort(); } return SubString(*this, r); } const SubString String::operator()(const Range& r) const { if ( ! r.valid() ) { #ifndef NOCRTOUTPUT cerr << "[BADRANGE] Invalid or undefined Range [" << r << "] for SubString" << endl; #endif abort(); } return SubString(*this, r); } void String::operator=(const String& s) { if ( &s == this ) return; rep->removeRef(); rep = s.rep; rep->addRef(); } void String::operator=(const SubString& ss) { unsigned ln = ss.length(); char* new_buf = new_String(ln+1); strncpy(new_buf, ss.ptr(), ln); new_buf[ln] = '\0'; rep->removeRef(); rep = new StringRep(new_buf, ln+1); } void String::operator=(const char* cs) { rep->removeRef(); rep = new StringRep(cs); } String String::operator+(const String& s) const { unsigned l1 = length(); unsigned l2 = s.length(); delete scratch_buf; scratch_buf = new char[l1+l2+1]; strcpy(scratch_buf, rep->buf_ptr()); strcpy(&scratch_buf[l1], s.rep->buf_ptr()); return String(scratch_buf); } String String::operator+(const SubString& ss) const { unsigned l1 = length(); unsigned l2 = ss.length(); delete scratch_buf; scratch_buf = new char[l1+l2+1]; strcpy(scratch_buf, rep->buf_ptr()); strncpy(&scratch_buf[l1], ss.ptr(), l2); scratch_buf[l1+l2] = '\0'; return String(scratch_buf); } String String::operator+(const char* cs) const { unsigned l1 = length(); unsigned l2 = strlen(cs); delete scratch_buf; scratch_buf = new char[l1+l2+1]; strcpy(scratch_buf, rep->buf_ptr()); strcpy(&scratch_buf[l1], cs); return String(scratch_buf); } String String::operator+(char c) const { unsigned l1 = length(); delete scratch_buf; scratch_buf = new char[l1+1+1]; strcpy(scratch_buf, rep->buf_ptr()); scratch_buf[l1] = c; scratch_buf[l1+1] = '\0'; return String(scratch_buf); } String operator+(const char* cs, const String& s) { unsigned l1 = strlen(cs); unsigned l2 = s.length(); delete scratch_buf; scratch_buf = new char[l1+l2+1]; strcpy(scratch_buf, cs); strcpy(&scratch_buf[l1], s); return String(scratch_buf); } String operator+(char c, const String& s) { unsigned l2 = s.length(); delete scratch_buf; scratch_buf = new char[1+l2+1]; scratch_buf[0] = c; strcpy(&scratch_buf[1], s); return String(scratch_buf); } void String::operator+=(const String& s) // Concatenate a String with another { unique(); rep->append(s, s.length()); } void String::operator+=(const SubString& ss) { unique(); rep->append(ss.ptr(), ss.length()); } void String::operator+=(const char* cs) { unique(); rep->append(cs, strlen(cs)); } void String::operator+=(char c) { unique(); rep->append(&c, 1); } char& String::operator[] (unsigned i) { if (i >= length()) indexRangeErr(); unique(); return at(i); } char String::operator[] (unsigned i) const { if (i >= length()) indexRangeErr(); return rep->buf_ptr()[i]; } int String::index(char c, unsigned start_pos) const { if ( c == '\0' || start_pos > length() ) return -1; const char* where = ::strchr(&rep->buf_ptr()[start_pos],c); return ( where ? where - rep->buf_ptr() : -1 ); } int String::rindex(char c, unsigned start_pos) const { register const char* where; if ( start_pos > length() ) start_pos = length(); for ( where = &rep->buf_ptr()[start_pos]; where > rep->buf_ptr(); where-- ) { if ( *where == c ) { return (where - rep->buf_ptr()); } } /* where == rep->p */ if ( *where == c ) return 0; return -1; } void String::printOn(ostream& strm) const { strm << rep->buf_ptr(); } void String::scanFrom(istream& strm) // Read next line of input from strm into this string. { const INPUTBUFSIZE = 512; ostream* os = strm.tie((ostream*)0); if (os != 0) { os->flush(); strm.tie(os); } char c; strm.get(c); if (c != '\n') strm.putback(c); char temp[INPUTBUFSIZE+1]; strm.get(temp,INPUTBUFSIZE+1); *this = String(temp); while ( strm.gcount() >= INPUTBUFSIZE ) { strm.get(temp,INPUTBUFSIZE+1); *this += temp; } }