home *** CD-ROM | disk | FTP | other *** search
Text File | 1996-03-10 | 10.2 KB | 328 lines | [TEXT/ALFA] |
- // This may look like C code, but it is really -*- C++ -*-
- /*
- ************************************************************************
- *
- * Arithmetic Coding
- * Adaptive Histogram-based model for the data source
- *
- *
- * The present file provides the functionality for both building the
- * histogram for a sequence of integers items and supplying the
- * arithmetic codec with probabilities of the items estimated from
- * the histogram.
- *
- * The program uses the histogram to find out which symbols shall really
- * come out and how frequent. It allocates frequency tables only for
- * the symbols that really rather than potentially need to be coded.
- * In all other respects (assuming Lorentzian-like initial probability
- * distribution and tailoring as more symbols become known) the
- * model behaves way the same as the simple AdaptiveModel does.
- *
- * $Id: arithm_modadh.cc,v 2.4 1995/07/08 19:27:16 oleg Exp oleg $
- *
- ************************************************************************
- */
-
- #ifdef __GNUC__
- #pragma implementation "arithm_modadh.h"
- #endif
-
- #include "arithm_modadh.h"
- #include "std.h"
-
-
- /*
- *========================================================================
- * Histogram-based Adaptive model for the data source
- */
-
-
- /*
- *------------------------------------------------------------------------
- * Constructors and Destructors
- */
-
- // Constructor
- AdaptiveHistModel::AdaptiveHistModel(const Histogram& histogram)
- : no_distinct_symbols(histogram.no_distinct_symbols()),
- update_inc(0)
- {
- // Skip leading and tailing dummy symbols
- // (never occured in the histogram)
- for(symbol_lwb=histogram.symbol_lwb;
- symbol_lwb < histogram.symbol_upb && histogram.get(symbol_lwb) == 0;
- symbol_lwb++)
- ;
-
- for(symbol_upb=histogram.symbol_upb;
- symbol_upb > symbol_lwb && histogram.get(symbol_upb) == 0;
- symbol_upb--)
- ;
- assure( symbol_upb > symbol_lwb,
- "there should be at least 2 symbols to code" );
- no_potential_symbols = symbol_upb - symbol_lwb + 1;
- assert( no_potential_symbols >= no_distinct_symbols );
- allocate_model(no_distinct_symbols);
- symbol_to_index = new Index[no_potential_symbols];
- index_to_symbol = new Symbol[no_distinct_symbols];
-
- initialize_model(histogram);
- }
-
- // Constructor for a dummy model. The real
- // model contents is supposed to be read
- // in later
- AdaptiveHistModel::AdaptiveHistModel(void)
- : symbol_lwb(0), symbol_upb(0),
- no_potential_symbols(0),
- no_distinct_symbols(0),
- update_inc(0)
- {
- symbol_to_index = 0;
- index_to_symbol = 0;
- }
-
- // Destructor
- AdaptiveHistModel::~AdaptiveHistModel(void)
- {
- assert( symbol_to_index != 0 && index_to_symbol != 0 );
- delete [] symbol_to_index;
- delete [] index_to_symbol;
- }
-
- /*
- *------------------------------------------------------------------------
- * Sort the histogram and set up the model
- */
-
- static struct Hist_elem
- { unsigned int count; Symbol symbol; }
- * Hist_sorted;
-
- // Comparison routine to sort the Histogram
- // in the descending order
- static int hist_compar(const void * eli, const void * elj)
- {
- if( ((const Hist_elem*)eli)->count < ((const Hist_elem*)elj)->count )
- return 1;
- else if( ((const Hist_elem*)eli)->count == ((const Hist_elem*)elj)->count )
- return 0;
- else
- return -1;
- }
-
- void AdaptiveHistModel::initialize_model(const Histogram& histogram)
- {
- Hist_sorted = new Hist_elem[no_distinct_symbols];
-
- register Symbol symbol;
- register int i;
- for(symbol=histogram.symbol_lwb,i=0; symbol<=histogram.symbol_upb; symbol++)
- {
- unsigned int count = histogram.get(symbol);
- if( count != 0 )
- Hist_sorted[i].count = count, Hist_sorted[i].symbol = symbol, i++;
- }
- assert( i == no_distinct_symbols );
-
- qsort(Hist_sorted,no_distinct_symbols,sizeof(Hist_sorted[0]),hist_compar);
-
- memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
- for(i=0; i<no_distinct_symbols; i++)
- {
- Index index = i+1;
- Symbol symbol = Hist_sorted[i].symbol;
- assert( symbol <= symbol_upb && symbol >= symbol_lwb );
- symbol_to_index[symbol-symbol_lwb] = index;
- index_to_symbol[index-1] = symbol;
- }
- assert( i<EOF_index );
-
- initial_distribution();
- }
-
- // Assign initial frequency counts
- // according to the Lorentzian ditribution
- void AdaptiveHistModel::initial_distribution(void)
- {
- register int i;
- cum_frequencies[no_indices] = 0;
- // update_inc > 1 means the real
- // occurence of a symbol means more
- update_inc = no_indices/10 + 1; // than just a potential occurence.
- // Since all frequencies must be >0,
- // frequency count one can be regarded
- // as being "infinitely small"
-
- // The folowing distribution is
- // certaily ad hoc, but seems to work
- for(i=no_indices; i>0; i--)
- {
- frequencies[i] = ( i <= 2 ? 45*update_inc-30 :
- i <= 12 ? 3*update_inc : 1 );
- cum_frequencies[i-1] = cum_frequencies[i] + frequencies[i];
- }
- frequencies[0] = 0; // Must be zero (an impossible value)
-
-
- }
-
- /*
- *------------------------------------------------------------------------
- * Searching for index/symbol
- */
-
- // Return the index of a given symbol
- Index AdaptiveHistModel::get_index(const Symbol symbol) const
- {
- if( symbol < symbol_lwb || symbol > symbol_upb )
- _error("Symbol %d is out of interval [%d,%d]",
- symbol, symbol_lwb, symbol_upb);
- Index index = symbol_to_index[symbol-symbol_lwb];
- if( index == 0 )
- _error("\nSymbol %d has not been counted when Histogram was constructed",
- symbol);
- return index;
- }
-
- // Return the symbol for a given index
- Symbol AdaptiveHistModel::get_symbol(const Index index) const
- {
- if( index < 1 || index > no_indices || index == EOF_index )
- _error("Index %d is out of boundaries [1,%d)",index,no_indices);
- return index_to_symbol[index-1];
- }
-
- /*
- *------------------------------------------------------------------------
- * Input/Output for the frequency tables
- * As a matter of fact, only index_to_symbol[] table along with its size
- * needs to be saved. All other tables can easily be reconstructed from that.
- *
- * Thus, the format the tables are written is follows
- * short no_distinct_symbols
- * short index_to_symbol[0..no_distinct_symbols-1]
- * The tables entries are coded using a variable bit size algorithm,
- * see BitIn/BitOut for more details.
- */
-
- void AdaptiveHistModel::open(BitOut& file)
- {
- file.write_short(no_distinct_symbols);
- for(register int i=0; i<no_distinct_symbols; i++)
- file.put_short(index_to_symbol[i]);
- }
-
- // Read in the tables and set up
- // the model
- void AdaptiveHistModel::open(BitIn& file)
- {
- no_distinct_symbols = file.read_short("Reading AdaptiveHistModel tables");
- allocate_model(no_distinct_symbols);
- index_to_symbol = new Symbol[no_distinct_symbols];
-
- for(register int i=0; i<no_distinct_symbols; i++)
- {
- const Symbol symbol = file.get_short();
- if( i== 0 )
- symbol_lwb = symbol_upb = symbol;
- else
- symbol_lwb = symbol_lwb > symbol ? symbol : symbol_lwb,
- symbol_upb = symbol_upb < symbol ? symbol : symbol_upb;
- index_to_symbol[i] = symbol;
- }
-
- no_potential_symbols = symbol_upb - symbol_lwb + 1;
- assert( no_potential_symbols > 1 );
- assert( no_distinct_symbols <= no_potential_symbols );
-
- symbol_to_index = new Index[no_potential_symbols];
-
- memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
- for(register Index index=1; index<=no_distinct_symbols; index++)
- {
- Symbol symbol = index_to_symbol[index-1];
- symbol_to_index[symbol-symbol_lwb] = index;
- }
-
- initial_distribution();
- }
-
- /*
- *------------------------------------------------------------------------
- * Update the adaptive model
- * to account for occurence of a particular index
- */
-
- // Scale the accumulated statistics down
- // in anticipation of a change, or simply to
- // keep counters from overflow
- void AdaptiveHistModel::scale_down_past(void)
- {
- register int cum = 0; // If so, halve all the counts
- for(register int i=no_indices; i>=0; i--)
- { // keeping them nonzero, and
- cum_frequencies[i] = cum; // recompute the cumulative counts
- cum += (frequencies[i] = (frequencies[i]+1) >> 1);
- }
- }
-
- void AdaptiveHistModel::update_model(const Index index)
- {
- // See if freq counts near their max
- if( total_cum_freq() >= (Top_freq>>1) ) // /2 makes rescaling work more often
- scale_down_past();
- // Tentatively increment the freq
- // count for index, just an estimate
- const Lword tent_freq = frequencies[index] +
- (update_inc > 4 ? update_inc : 1);
-
- // Try to pull 'index' to the beginning
- // of the freq. table, skipping symbols
- // with the lower freq.
- register Lword * fp = &frequencies[index];
- while( *--fp != 0 && *fp <= tent_freq ) // Note, freq[0] is always 0
- ;
- // Find a new position for the index
- const Index new_index = ++fp - frequencies;
- if( new_index < index ) // If the index is to move, update
- { // the translation tables
- Symbol symbol_oind = index_to_symbol[index-1];
- Symbol symbol_nind = index_to_symbol[new_index-1];
- index_to_symbol[index-1] = symbol_nind;
- index_to_symbol[new_index-1] = symbol_oind;
- symbol_to_index[symbol_oind-symbol_lwb] = new_index;
- symbol_to_index[symbol_nind-symbol_lwb] = index;
-
- // Note, we've just swapped index
- // and new_index. If their frequencies
- // differ, we need to update the
- // frequecies and cumulative freqs
- const Lword old_freq = frequencies[index];
- const Lword new_freq = frequencies[new_index];
- if( old_freq != new_freq )
- {
- frequencies[index] = new_freq;
- frequencies[new_index] = old_freq;
- const long int delta = new_freq - old_freq;
- for(register Lword * cfp = &cum_frequencies[new_index];
- cfp < &cum_frequencies[index];)
- *cfp++ += delta;
- }
- }
-
- frequencies[new_index] += update_inc; // Increment the freq count for index
- register Lword * cfp; // And update the cumulative counts
- for(cfp=&cum_frequencies[new_index]; cfp>&cum_frequencies[0]; )
- *--cfp += update_inc;
-
- #if 0
- {
- message("index %d, new_index %d, update_inc %d\n",index,new_index,update_inc);
- for(register Index i=0; i<no_indices; i++)
- message("%d ",frequencies[i]);
- message("\n");
- }
- #endif
- }
-