/* Byte (and bit) occurance frequency counter.
   Author: Michal Guerquin
   Date: June 2005
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


/* make a bit histogram from a byte histogram

   byte_histogram must be preallocated to size 256.
   hist must be preallocated to size 8.
   out_hist[i] will hold the number of occurances of bit
   i being turned on, as represented by the byte_histogram
 */
void bit_histogram(int * byte_histogram, int * out_hist)
{
  int i,j;
  /* for every byte */
  for (i=0;i<256;i++)
  {
    /* for every bit */
    for (j=0;j<8;j++)
    {
      /* if this bit is turned on in this byte,
         then increment the counter of that bits
         usage by how many of these bytes exist */
      if (i & 1<<j) out_hist[j] += byte_histogram[i];
    }
  }
}


/* make histogram from a file descriptor
  
   out_hist must be preallocated to size 256.
   out_hist will be populated such that out_hist[i]
   holds the number of occurances of byte value i
   in the file descriptor specified. */
void mkhist(FILE * fd, int * out_hist)
{
  unsigned char * c = (unsigned char *)malloc(4096);
  int i;
  int read = 0;

  /* while the file still has content */
  while (!feof(fd))
  {
    /* slurp in at most 4096 bytes */
    read = fread(c, 1, 4096, fd);
    /* for every byte read, affect the out_hist appropriately */
    for (i=0;i<read;i++) 
      out_hist[(int)c[i]] += 1;
  }
  /* close file */
  fclose(fd);
  /* free up the allocated memory */
  free(c);
}


/* display contents of a histogram to stdout.
  
   hist must hold n elements. output format is, per
   line:
  
     <count> <index> [<char>]
   
   - if printchars is 1, then the ASCII character of
   element indexes 33 to 126 (inclusive) are displayed
   too. if 0, then they are omitted.
  
   - if printzeros is 1, then lines where <count> is
   zero are displayed. if 0, they are omitted. */
void printhist(int * hist, int n, int printchars, int printzeros)
{
  int i;
  int digits = 0;
  int maxdigits = 0;
  char *c = (char *)malloc(4);

  if (hist==NULL) return;

  /* find the longest number of digits that will
     be displayed as a counter (for alignment
     purposes) */
  for (i=0; i<n; i++)
  {
    digits = 0;
         if (hist[i]<10) digits = 1;
    else if (hist[i]<100) digits = 2;
    else if (hist[i]<1000) digits = 3;
    else if (hist[i]<10000) digits = 4;
    else if (hist[i]<100000) digits = 5;
    else if (hist[i]<1000000) digits = 6;
    else if (hist[i]<10000000) digits = 7;
    if (digits>maxdigits) maxdigits = digits;
  }

  /* for every element in hist */
  for (i=0; i<n; i++)
  {
    /* omit this element? */
    if (!printzeros && hist[i]==0)
      continue;

    /* make a description string */
    if (printchars && i>=33 && i<= 126)
      sprintf(c, "%3d %c", i, i);
    else
      sprintf(c, "%3d", i);

    /* display the element */
         if (maxdigits==0) printf("%1d %s\n", hist[i], c);
    else if (maxdigits==1) printf("%1d %s\n", hist[i], c);
    else if (maxdigits==2) printf("%2d %s\n", hist[i], c);
    else if (maxdigits==3) printf("%3d %s\n", hist[i], c);
    else if (maxdigits==4) printf("%4d %s\n", hist[i], c);
    else if (maxdigits==5) printf("%5d %s\n", hist[i], c);
    else if (maxdigits==6) printf("%6d %s\n", hist[i], c);
    else if (maxdigits==7) printf("%7d %s\n", hist[i], c);
  }
  /* free up the allocated memory */
  free(c);
}

/* display a usage message */
void usage(char * programname)
{
  printf("Usage: %s [-bytes] [+bits] [-zeros] [+v] file ...\n", programname);
  printf("\n");
  printf("Counts and displays bit and byte occurances in one or more files.\n");
  printf("\n");
  printf("  -bytes    omit byte counts.\n");
  printf("  +bits     display bit counts.\n");
  printf("  -zeros    omit count=0 entries.\n");
  printf("  +v        verbose.\n");
  printf("\n");
}

int main(int argc, char *argv[])
{
  int * hist_bytes = (int *)malloc(sizeof(int)*256);    /* histogram of bytecounts */
  int * hist_bits = (int *)malloc(sizeof(int)*8);       /* histogram of bitcounts */
  FILE * fd;                                            /* a file descriptor */
  int i;                                                /* generic counter */
  int opt_zeros = 1;                                    /* option defaults */
  int opt_bits = 0;
  int opt_bytes = 1;
  int opt_verbose = 0;
  char ** filename;                                     /* list of filenames to process */
  int filename_count;                                   /* # of filenames to process */
  char * dummy_pchar;                                   /* dummy char for discovering sizeof(*char) */

  if (argc == 0)
  {
    fprintf(stderr, "ERR: At least one argument expected.\n");
    usage(argv[0]); 
    return 0;
  }

  /* allocate enough memory for as many filenames as there are arguments */
  filename = (char **)malloc(sizeof(dummy_pchar)*argc);
  filename_count = 0;
 
  /* analyze arguments */
  if (argc > 0)
  {
    for (i=1;i<argc;i++)
    {
      /* if the argument is 2 or more chars long, and begins with - or + */
      if (strlen(argv[i])>1 && (argv[i][0]=='-' || argv[i][0]=='+'))
      {
        /* set appropriate options */
             if (strcmp(argv[i], "-zeros")==0) opt_zeros = 0;
        else if (strcmp(argv[i], "+bits")==0)  opt_bits  = 1;
        else if (strcmp(argv[i], "-bytes")==0) opt_bytes = 0;
        else if (strcmp(argv[i], "+v")==0) opt_verbose = 1;
        else
        {
          fprintf(stderr, "ERR: Invalid flag, %s.\n", argv[i]);
          usage(argv[0]);
          return 0;
        }
      }
      /* if it's not an option, then it's a filename */
      else
      {
        /* make sure the file exists before proceeding
           test by opening and closing it */
        fd = fopen(argv[i], "rb");
        if (fd == NULL)
        {
          fprintf(stderr, "ERR: Cannot open file, %s.\n", argv[i]);
          usage(argv[0]);
          return 0;
        }
        fclose(fd);
        filename[filename_count] = argv[i];
        filename_count += 1;
      }
    }
  }
  if (filename_count == 0)
  {
    fprintf(stderr, "ERR: At least one filename expected.\n");
    usage(argv[0]); 
    return 0;
  }

  /* zero out the histogram counters */
  for (i=0; i<256; i++) hist_bytes[i] = 0;
  for (i=0; i<8; i++) hist_bits[i] = 0;

  /* if we're verbose, print out options and filenames specified */
  if (opt_verbose)
  {
    printf("opt_verbose %d\n", opt_verbose);
    printf("opt_zeros %d\n", opt_zeros);
    printf("opt_bits %d\n", opt_bits);
    printf("opt_bytes %d\n", opt_bytes);
    printf("filename_count %d\n", filename_count);
    for (i=0; i<filename_count; i++)
    {
      printf("filename%d %s\n", i, filename[i]);
    }
  }

  /* if there's nothing to display, then there's nothing to do */
  if (!opt_bytes && !opt_bits)
  {
    fprintf(stderr, "ERR: There is nothing to do.\n");
    usage(argv[0]); 
    return 0;
  }

  /* for every file */
  for (i=0; i<filename_count; i++)
  {
    /* process it and accumulate byte occurances in hist_bytes */
    if (opt_verbose) printf("processing file %s\n", filename[i]);
    fd = fopen(filename[i], "rb");
    mkhist(fd, hist_bytes);
    fclose(fd);
  }

  /* if need to display bytes */
  if (opt_bytes)
    printhist(hist_bytes, 256, 1, opt_zeros);

  /* if need to display bits */
  if (opt_bits)
  {
    bit_histogram(hist_bytes, hist_bits);
    printhist(hist_bits, 8, 0, opt_zeros);
  }

  return 1;
}



