#include "huffman.h"
  // Roger Boerdijk 30.06.2002
// email: kitt3n@home.nl                                              
// Use the code any way you wish; you assume all responsibility for its 
// correctness and suitability should you use it. 
//-------------------------------------------------------------------------------------------------
int cHuffmanNode::operator () (const cHuffmanNode& a, const cHuffmanNode& b) const 
{ return a.frequency>b.frequency;
}
  int cHuffmanNode::operator () (const cHuffmanNode* a, const cHuffmanNode* b) const 
{ return a->frequency>b->frequency;
}
  //-------------------------------------------------------------------------------------------------
cHuffmanTree::cHuffmanTree (U32 frequency_table[256]) : root(NULL)
{ 
  // Constructor for a byte-frequency table - usefull for compressing files
  ft.resize (256);
  for (int i=0; i<256; i++)
  {
    ft[i].first = i;
    ft[i].second=frequency_table[i];
  }
  buildTreeDo (ft);
}
  cHuffmanTree::cHuffmanTree (std::vector<pair<U8, U32> > frequency_table) : root (NULL)
{ 
  // more generic frequency table, can contain more/less ch's with
  // respective frequencies than the byte-frequency-table. 
  // Nice if you have a limited character-set, for example only 
  // A-Z and 0-9, instead of the entire ascii-table
  ft = frequency_table;
  buildTreeDo (ft);
}
  cHuffmanTree::cHuffmanTree (U8* data, U32 size) : root (NULL)
{ 
  // very simular to cHuffmanTree::buildTree (U32 frequency_table[256])
  // only this function will build the frequency-table itself
  pair<U8, U32> thePair;
  ft.resize (256);
  for (int z=0; z<ft.size(); z++)
  {
    thePair.first = z;
    thePair.second = 0; 
    ft[z]=thePair;
  }
    // count frequencies
  for (int i=0; i<size; i++)
    ft[data[i]].second++;
    buildTreeDo (ft);
}
  cHuffmanTree ::~cHuffmanTree ()
{
  freeTree ();
}
  //-------------------------------------------------------------------------------------------------
bool cHuffmanTree::buildTreeDo (const std::vector<pair<U8, U32> > frequency_table)
{
  // If there was already a tree, cleanup and build the new one
  if (root) { freeTree (); }
    ft = frequency_table;
  
  std::priority_queue<cHuffmanNode*, std::vector<cHuffmanNode*>, cHuffmanNode> h;
  cHuffmanNode* t = NULL;
    // 1. Fill a heap with characters sorted on frequency
  //    First in heap character with highest frequency
  for (int i=0; i<frequency_table.size(); i++)
  { 
    // We push if the symbol has at least 1 occurance,
    // if the symbol has 0 occurances it's not usefull
    // to put it in the tree. 
    if (frequency_table[i].second!=0)
    {
      cHuffmanNode* tmp = new cHuffmanNode;
      assert (tmp && "cHuffman::buildTree() Failed!");
      tmp->ch        = frequency_table[i].first;
      tmp->frequency = frequency_table[i].second;
      h.push (tmp);
    }
  }
    // 2. Build the tree from the heap
  while (!h.empty())
  {
    if (h.size()==1)
    {
      // If h contains only one character X, make X the root of T
      t = h.top();
      h.pop();
    }
    else
    {
      // Pick two characters X and Y with the lowest frequencies
      // and delete them from H
      cHuffmanNode* X=h.top(); h.pop();
      cHuffmanNode* Y=h.top(); h.pop();
      // replace X and Y with a new character Z, whose frequency
      // is the sum of the frequencies of X and Y
      cHuffmanNode* Z = new cHuffmanNode;
      Z->frequency   = X->frequency+Y->frequency;
      Z->left_child  = X;
      Z->right_child = Y;
      h.push (Z);
    }
  }
    root = t;
//buildCode (frequency_table, t)
  return true;
}
  //-------------------------------------------------------------------------------------------------
void cHuffmanTree::freeTree (void)
{
  if (root)
  {
    freeTreeR (root);
    root=NULL;
  }
}
  void cHuffmanTree::freeTreeR (cHuffmanNode* node)
{
  if (!node) return;
    if (!(node->right_child && node->left_child))
  {
    delete node;
    return;
  }
    if (node->left_child)
    freeTreeR (node->left_child);
  if (node->right_child)
    freeTreeR (node->right_child);
    // we deleted all of this node's children, now delete
  // the node itself
  delete node;
}
  //-------------------------------------------------------------------------------------------------
U32  cHuffmanTree::getTree (std::string& theTree)
{
  return getTreeR (theTree, root);
}
  U32 cHuffmanTree::getTreeR (std::string& theTree, cHuffmanNode* node)
{
  // This function stores reconstruction-data for our tree
  // Note that the "1" and "0" do NOT represent huffman-codes
  // but it tells if a node is a branch or a leaf
  // Note that we don't store frequency data for reconstruction!
  if (!node) return 0; 
    if (!(node->left_child && node->right_child))
  { // leaf
    char ch = node->ch;
    theTree.append ("1");
    // note: will give problems with >256 frequency-tables!
    theTree+=ch; 
    return 2;
  }
  else
  { // Branch
    theTree.append ("0");
    return 1+
      getTreeR (theTree, node->left_child)+
      getTreeR (theTree, node->right_child);
  }
    std::vector<std::string> bs;
  getStringTable (bs);
}
  void cHuffmanTree::setTree (const std::string& theTree)
{
  if (root)
    freeTree ();
    U32 index=0;
  root = setTreeR (theTree, index);
}
  cHuffmanNode* cHuffmanTree::setTreeR (const std::string& theTree, U32& idx)
{
  // This function reconstructs our tree
  // Note that we don't reconstruct frequency data!
  if (idx>=theTree.size())
    return NULL;
    cHuffmanNode* n = new cHuffmanNode; 
  U8 b = theTree[idx];
    if (b == '0') 
  { // branch
    n->ch          = 0; 
    n->left_child  = setTreeR (theTree, ++idx); 
    n->right_child = setTreeR (theTree, ++idx); 
  }
  else        
  { // leaf
    idx++;
    n->ch          = theTree[idx];
    n->left_child  = NULL;
    n->right_child = NULL;
  }
  return n; 
}
  //-------------------------------------------------------------------------------------------------
bool cHuffmanTree::gcp(char* s, cHuffmanNode* n, int c)
{
  // get-character-path worker function.
  if (!n) return 0; 
  if (!(n->left_child && n->right_child))
  {
    if (c == n->ch)
      return true;
    else
      return false;
  }
  else
  {
    char _s[256];
    if (gcp(s, n->left_child, c))
    {
      strcpy(_s, s);
      strcpy(s, "0");
      strcat(s, _s);
      return true;
    }
    else if (gcp(s, n->right_child, c))
    {
      strcpy(_s, s);
      strcpy(s, "1");
      strcat(s, _s);
      return true;
    }
    else
      return false;
  }
}
  
char* cHuffmanTree::getCharacterPath (cHuffmanNode* n, int c)
{
  // Get the path through the tree to find the bit-string
  // we need to encode / decode a character. Note that
  // we are using a static-member ie NOT an example of 
  // good programming!
  static char string[256];
  strcpy(string, "");
  if (gcp(string, n, c))
    return string;
  else
    return "";
}
  void cHuffmanTree::getStringTable (std::vector<std::string>& bitString)
{
  // Example function which will get all string encodings of each character.
  // Note that '0' is a 0-bit and '1' is a 1-bit
  bitString.resize (ft.size());
  for (int i=0; i<ft.size(); i++)
  {
    char* tmp = getCharacterPath (root, ft[i].first); 
    bitString[i]=tmp;
  }
}
  //-------------------------------------------------------------------------------------------------
  bool cHuffmanTree::write_bit (bool bit, U8& theByte, bool reset)
{
  // grow a byte, bit by bit, and then flush it theByte reset will 
  // initialize the function to default-state. will return true as 
  // soon as 8 bits were written and it is up to the called to save
  // the value in theByte.
  // note that if the function returns false, theByte does not 
  // contain any usefull value
  static long bit_index = 7;
  static U8   byte = 0x00;
    bool flush = false;
    if (reset) 
  { bit_index=7; 
    byte = 0x00;
    return false; 
  } 
    if(bit_index == -1) 
  {
    // flush byte
    bit_index = 7;
    byte = 0x00;
  }
  //byte |= (bit ? 1 : 0) << bit_index;
  byte |= bit << bit_index;
  bit_index--;
  
  if(bit_index == -1) 
  { 
    theByte = byte; 
    flush=true;
  }
  return flush;
}
  bool cHuffmanTree::read_bit(bool& bit, U8 byte, bool reset)
{
  // return one bit at a time, returns true when the 
  // byte finished and it wants the next byte
  static long bit_index = 7;
  bool requestNextByte=false;
  int bit_value;
    if (reset)
  { bit_index = 7;
    requestNextByte=false;
    return true;
  }
    // bit_value = (byte & (1 << bit_index)) ? 1 : 0;
  bit_value = byte & (1 << bit_index);
  bit       = bit_value!=0;
  bit_index--;
    if(bit_index == -1)
  {
    requestNextByte=true;
    bit_index = 7;
  }
  
  return requestNextByte;
}
  U32 cHuffmanTree::getCompressedSize (U8* udata, U32 len)
{
  assert (root && udata);
    // get stringtable with compression info
  int i;
  std::vector<std::string> bs;
  getStringTable (bs);
    // calculate datasize
  U32 bitsEstimated=0;
  U32 byteEstimated=0;
  for (i=0; i<len; i++)
  { 
    bitsEstimated+=bs[udata[i]].length();
    assert (bs[udata[i]].length()!=0 && "cHuffmanTree::compress() Unknown ch");
  }
    // 4 bytes (U32) extra for uncompressed size
  byteEstimated=bitsEstimated/8; //+bitsEstimated%8==0?0:1+4;
  byteEstimated+=bitsEstimated%8==0?0:1;
  byteEstimated+=4;
    return byteEstimated;
}
  
//-------------------------------------------------------------------------------------------------
U32 cHuffmanTree::compress (U8* udata, U32 len, U8** cdata)
{
  // NOTE: Client has to deallocate memory which we allocate here!
  //       (maybe we can somehow change this)
  assert (root);
    // get stringtable with compression info
  int i;
  std::vector<std::string> bs;
  getStringTable (bs);
    U32 byteEstimated = getCompressedSize (udata, len);
    U8* data = new U8[byteEstimated]; 
  if (data)
  {
    *cdata = data;
    *((U32*)data)=len; 
    data+=4;
      // compress data
    U8 cbyte;
    write_bit (false, cbyte, true);  // reset
    for (i=0; i<len; i++) 
    {
      U32 bitlen = bs[udata[i]].length();
      const I8* str = bs[udata[i]].c_str();
        for (int j=0; j<bitlen; j++) 
      {
        // write the correct bit into our temp-buffer
        if (write_bit(((str[j] - '0')!=0), cbyte))
        {
          // we wrote 8 bits, copy to our compressed buffer
          // and prepare for next byte
          *data=cbyte;
          data++;
        }
      }
    }
      // fill last byte to end with 0
    for (int fillbit=0; fillbit<8; fillbit++) 
    {
      if (write_bit(0, cbyte)) 
      { *data=cbyte; 
        data++; 
        break;
      } 
    }
    return byteEstimated;
  }
  return 0; 
}
  
U32 cHuffmanTree::uncompress (U8* cdata, U32 len, U8** udata)
{
  // NOTE: Client has to deallocate memory which we allocate here!
  //       (maybe we can somehow change this)
  U32 ulen = *((U32*)cdata);
  cdata+=4;
    U8* data = new U8[ulen]; 
  if (data)
  {
    *udata=data;
      bool bit=0;
    cHuffmanNode* curnode = root;
    U32 decodedU8s=0;
      // make sure our readbit is in initialized state
    read_bit (bit, cdata[0], true);
      while (decodedU8s<ulen)
    {    
      if (cHuffmanTree::read_bit(bit, cdata[0]))
      { // next byte!
        cdata++;
      }
        if (bit==0)
		  {	curnode = curnode->left_child;
		  }
      else
		  {	curnode = curnode->right_child;
		  }
        // if we're at a leaf, output the character and start over
      if (!(curnode->left_child && curnode->right_child))
      {	
        data[decodedU8s]=(curnode->ch);
        decodedU8s++;
        curnode = root;
      }
    }
    return ulen;
  }
  return 0;
}
  //-------------------------------------------------------------------------------------------------
U32 cHuffmanTree::compress (const I8* inname, const I8* outfile)
{
  bool success=false;
  FILE* fp, *fpout;
  if ((fp=fopen((char*)inname, "rb"))!=NULL)
  {
    if ((fpout=fopen((char*)outfile, "wb"))!=NULL)
    {
      // determine filesize
      fseek (fp, 0, SEEK_END);
      U32 len = ftell (fp);
      fseek (fp, 0, SEEK_SET);
        // read entire file into ram
      U8* ram  = new U8[len];
      U8* zip;
      if (ram)
      { fread (ram, len, 1, fp);
      }
        // build huffman tree & compress
      cHuffmanTree tree2 (ram, len);
      U32 csize = tree2.compress (ram , len, &zip);
        std::string htR;
      tree2.getTree (htR);
      U16 htS = htR.size();
      U8  htC;
        fwrite (&htS, sizeof (htS), 1, fpout);
      for (int i=0; i<htR.size(); i++)
      { htC = htR[i];
        fwrite (&htC, 1, 1, fpout);
      }
      fwrite (zip, csize, 1, fpout);
        delete zip;
      delete ram;
        fclose (fpout);
      success=true;
    }
    fclose (fp);
  }
  return success;
}
  U32 cHuffmanTree::uncompress (const I8* inname, const I8* outfile)
{
  bool success=false;
  FILE* fp, *fpout;
  if ((fp=fopen((char*)inname, "rb"))!=NULL)
  {
    if ((fpout=fopen((char*)outfile, "wb"))!=NULL)
    {
      // determine filesize
      fseek (fp, 0, SEEK_END);
      U32 len = ftell (fp);
      fseek (fp, 0, SEEK_SET);
        // read entire file into ram
      U8* zip  = new U8[len];
      if (zip)
      { fread (zip, len, 1, fp);
      }
        // build huffman tree & decompress
      U16 htS = *(U16*)(zip);
      std::string htR;
      htR.resize (htS);
      for (int i=0; i<htS; i++)
      { htR[i]=*(zip+i+2);
      }
      U32 htSize = sizeof(htS)+htR.size()*sizeof(U8);
        U8* ram;
      cHuffmanTree tree2;
      tree2.setTree (htR);
      U32 usize = tree2.uncompress (zip+htSize, len-htSize, &ram);
        fwrite (ram, usize, 1, fpout);
      delete zip;
      delete ram;
        fclose (fpout);
      success=true;
    }
    fclose (fp);
  }
  return success;
}
  
   |