I could have easily built something into the program I was writing, but I thought something more general purpose would be useful. I quickly wrote the following code. It takes a tally of each character in a file and then uses a quick sort to sort the data and then prints the results. I even added a switch to ignore whitespace characters.
#include <stdio.h>
/* Author: James Church
* Date: 09/11/06
* Program: charfreq
* Version: 0.3
* Description: Takes a single file from the command line as input and
* reports the character frequence of each character in order
* from most common to least common.
*
* Copyright © 2006 Free Software Foundation, Inc.
*
* Copying and distribution of this file, with or without modification,
* are permitted in any medium without royalty provided the copyright
* notice and this notice are preserved.
*/
#define CHARRANGE 256
typedef struct _charcount {
unsigned char c;
long count;
} CharCount;
void quicksort (CharCount *a, int i, int j);
int partition (CharCount *a, int i, int j);
int main(int argc, char **argv) {
int i;
unsigned char c;
CharCount freq[CHARRANGE];
FILE *file;
int ignore_whitespace = 0;
int argparse = 1;
if (argc == 1) {
printf("\nUsage: %s [-s] [filename] - Reports character frequence of file.\n", argv[0]);
printf(" -s Ignores whitespace (optional)\n");
return 0;
} // End If
if (strcmp("-s", argv[argparse]) == 0) {
ignore_whitespace = 1;
argparse++;
} // End If
if ((file = fopen(argv[argparse], "r")) == NULL) {
printf("Error: Cannot open %s\n", argv[1]);
return 0;
} // End If
argparse++;
for (i = 0; i < CHARRANGE; i++) {
freq[i].c = (unsigned char) i;
freq[i].count = 0;
} // End For
while (1) {
fread (&c, sizeof(unsigned char), 1, file);
if (feof(file)) break;
if (ignore_whitespace && (c == ' ' || c == '\t' || c == '\r' || c == '\n'))
continue;
freq[c].count++;
} // End While
quicksort(freq, 0, CHARRANGE-1);
for (i = CHARRANGE-1; i >= 0; i--) {
if (freq[i].count == 0) break;
printf("%c[%3d]: %d\n", freq[i].c, freq[i].c, freq[i].count);
} // End For
fclose(file);
return 0;
} // End Main
void quicksort (CharCount *a, int i, int j) {
int p;
if (i < j) {
p = partition (a, i, j);
quicksort (a, i, p-1);
quicksort (a, p+1, j);
} /* End If */
} /* End mergesort */
int partition (CharCount *a, int i, int j) {
int val = a[i].count;
int h = i;
int k;
CharCount temp;
for (k = i+1; k <= j; k++)
if (a[k].count < val) {
h++;
temp = a[h];
a[h] = a[k];
a[k] = temp;
} /* End If */
temp = a[i];
a[i] = a[h];
a[h] = temp;
return h;
} /* End partition */ |
Enjoy!

No comments:
Post a Comment