home *** CD-ROM | disk | FTP | other *** search
Text File | 1996-03-10 | 3.6 KB | 135 lines | [TEXT/ALFA] |
- // This may look like C code, but it is really -*- C++ -*-
- /*
- ************************************************************************
- *
- * Computing the Histogram
- * and related operations
- * $Id: histogram.cc,v 1.2 1995/05/31 16:55:30 oleg Exp oleg $
- *
- ************************************************************************
- */
-
- #ifdef __GNUC__
- #pragma implementation
- #endif
-
- #include "histogram.h"
- #include "myenv.h"
- #include "std.h"
- #include <math.h>
- #ifndef M_LOG2E
- #define M_LOG2E 1.4426950408889634074
- #endif
-
- // Constructor: constructs a clean histogram
- // ready to accumulate statistics for symbols
- // within [lwb,upb]
- Histogram::Histogram(const Histogram::Symbol lwb, const Histogram::Symbol upb)
- : symbol_lwb(lwb), symbol_upb(upb),
- no_potential_symbols(upb-lwb+1)
- {
- assure(no_potential_symbols > 1,
- "Bracketing interval for input symbols should include at least "
- "two symbols");
- counts = new unsigned int[no_potential_symbols];
- memset(counts,0,sizeof(counts[0])*no_potential_symbols);
- }
-
- // Destructor
- Histogram::~Histogram(void)
- {
- assert( counts != 0 );
- delete [] counts;
- }
-
- // Count a symbol
- void Histogram::put(const Symbol symbol)
- {
- if( symbol < symbol_lwb || symbol > symbol_upb )
- _error("Symbol %d is out of interval [%d,%d]",
- symbol, symbol_lwb, symbol_upb);
- counts[symbol-symbol_lwb] += 1;
- }
-
- // Count a symbol coercing it to the expected
- // range if necessary
- void Histogram::put_coerce(const Symbol symbol)
- {
- counts[ ( symbol < symbol_lwb ? symbol_lwb :
- symbol > symbol_upb ? symbol_upb :
- symbol ) - symbol_lwb ] += 1;
- }
-
- // Return the count for a symbol
- int Histogram::get(const Symbol symbol) const
- {
- if( symbol < symbol_lwb || symbol > symbol_upb )
- _error("Symbol %d is out of interval [%d,%d]",
- symbol, symbol_lwb, symbol_upb);
- return counts[symbol-symbol_lwb];
- }
-
- // Give the no. of distinct symbols, ie
- // symbols with non-zero count
- int Histogram::no_distinct_symbols(void) const
- {
- register int no = 0;
- register unsigned int *cp;
- for(cp=&counts[0]; cp<&counts[no_potential_symbols]; )
- if( *cp++ != 0 )
- no++;
- return no;
- }
-
- // Print the histogram
- void Histogram::print(const char * title) const
- {
- message("\nHistogram '%s'\n"
- "\nValue Quantity\n",title);
- register int i;
- register int no_symbols = 0;
- register int total = 0;
- register unsigned int *cp;
- for(i=symbol_lwb, cp=&counts[0]; i <= symbol_upb; i++,cp++)
- if( *cp != 0 )
- {
- total += *cp;
- no_symbols++;
- message("%2d %5d\n",i,*cp);
- }
- assert( cp == &counts[no_potential_symbols] );
- message("\n****** Total no. of symbols %d, total count %d\n\n",
- no_symbols,total);
- }
-
-
- // Estimate a zero-order entropy of a source
- // represented by the histogram
- // Using Shannon formula
- // log2(f) - SUM[ fi*log2(fi) ]/f
- // where fi is a count for symbol i, f is a total
- // count
- // The formula tells how many bits are necessary
- // to represent a single *average* symbol emitted
- // by the (stationary) source
-
- static inline double log2(const double x) { return log(x) * M_LOG2E; }
-
- double Histogram::estimate_entropy(void) const
- {
- register long total_count = 0;
- register unsigned int *cp;
- for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
- if( *cp != 0 )
- total_count += *cp;
-
- assert( total_count > 0 );
-
- double accum_log = 0;
- for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
- if( *cp != 0 )
- accum_log += *cp * log2( *cp );
-
- return log2(total_count) - accum_log/total_count;
- }
-