I could have easily built something into the program I was writing, but I thought something more general purpose would be useful. I quickly wrote the following code. It takes a tally of each character in a file and then uses a quick sort to sort the data and then prints the results. I even added a switch to ignore whitespace characters.
#include <stdio.h> /* Author: James Church * Date: 09/11/06 * Program: charfreq * Version: 0.3 * Description: Takes a single file from the command line as input and * reports the character frequence of each character in order * from most common to least common. * * Copyright © 2006 Free Software Foundation, Inc. * * Copying and distribution of this file, with or without modification, * are permitted in any medium without royalty provided the copyright * notice and this notice are preserved. */ #define CHARRANGE 256 typedef struct _charcount { unsigned char c; long count; } CharCount; void quicksort (CharCount *a, int i, int j); int partition (CharCount *a, int i, int j); int main(int argc, char **argv) { int i; unsigned char c; CharCount freq[CHARRANGE]; FILE *file; int ignore_whitespace = 0; int argparse = 1; if (argc == 1) { printf("\nUsage: %s [-s] [filename] - Reports character frequence of file.\n", argv[0]); printf(" -s Ignores whitespace (optional)\n"); return 0; } // End If if (strcmp("-s", argv[argparse]) == 0) { ignore_whitespace = 1; argparse++; } // End If if ((file = fopen(argv[argparse], "r")) == NULL) { printf("Error: Cannot open %s\n", argv[1]); return 0; } // End If argparse++; for (i = 0; i < CHARRANGE; i++) { freq[i].c = (unsigned char) i; freq[i].count = 0; } // End For while (1) { fread (&c, sizeof(unsigned char), 1, file); if (feof(file)) break; if (ignore_whitespace && (c == ' ' || c == '\t' || c == '\r' || c == '\n')) continue; freq[c].count++; } // End While quicksort(freq, 0, CHARRANGE-1); for (i = CHARRANGE-1; i >= 0; i--) { if (freq[i].count == 0) break; printf("%c[%3d]: %d\n", freq[i].c, freq[i].c, freq[i].count); } // End For fclose(file); return 0; } // End Main void quicksort (CharCount *a, int i, int j) { int p; if (i < j) { p = partition (a, i, j); quicksort (a, i, p-1); quicksort (a, p+1, j); } /* End If */ } /* End mergesort */ int partition (CharCount *a, int i, int j) { int val = a[i].count; int h = i; int k; CharCount temp; for (k = i+1; k <= j; k++) if (a[k].count < val) { h++; temp = a[h]; a[h] = a[k]; a[k] = temp; } /* End If */ temp = a[i]; a[i] = a[h]; a[h] = temp; return h; } /* End partition */ |
Enjoy!
No comments:
Post a Comment