Previous: , Up: The Implementation of Functionality of STL map Template   [Contents][Index]


6.5.4 Example of Using a Mapping Object

In the example of using a mapping object, the program counts frequencies of words in a text and prints the table of word frequencies in descending order by frequency. Words, which frequencies are counted by the program, are sequences of alphanumeric characters. All other characters are considered as word delimiters.

The program should be invoked with an argument that specifies the name of a file with an input text. An optional second argument should be a positive integer number that specifies how many most frequent words to print in the resulting frequency table.

The source code of the example is provided in the file samples/fqtab.c in the package distribution and is also given below.

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <qsmm/map.h>


#define ERREXIT(fmt, ...)                       \
    do {                                        \
        fprintf(stderr,(fmt), ## __VA_ARGS__);  \
        fprintf(stderr,"\n");                   \
        goto Exit;                              \
    }                                           \
    while (0)


struct word_fq_s {
    char *word;
    int  fq;
};


static qsmm_iter_t iter_word_to_fq=0;
static qsmm_map_t map_word_to_fq=0;


static int word_add(const char *word) {
    qsmm_map_find(map_word_to_fq,word,iter_word_to_fq);
    if (qsmm_map_iter_is_end(map_word_to_fq,iter_word_to_fq)) {
        char *keyp=strdup(word);
        if (!keyp) return -1;
        if (qsmm_map_insert(map_word_to_fq,keyp,0,iter_word_to_fq)<0) {
            free(keyp);
            return -1;
        }
    }
    (*((int *) qsmm_map_iter_val(iter_word_to_fq)))++;
    return 0;
}


static int word_compar(const void *o1p, const void *o2p, void *dummyp) {
    return strcmp(o1p,o2p);
}


static int word_fq_compar(const void *o1p, const void *o2p) {
    int result;
    const struct word_fq_s *word_fq_1p=o1p, *word_fq_2p=o2p;
    if ((result=word_fq_2p->fq-word_fq_1p->fq)) return result;
    return strcmp(word_fq_1p->word,word_fq_2p->word);
}


int main(int argc, char **argv) {
    const char *fln_text;
    char *word_p=0;
    int ii, nword, word_sz=0, word_allo=2, n_word_top=0, exit_code=1;
    FILE *file_text_p=0;
    struct word_fq_s *word_fq_p=0;
    if (argc<2) ERREXIT("input file not specified");
    fln_text=argv[1];
    if (!(file_text_p=fopen(fln_text,"r")))
        ERREXIT("%s: failed to open the file",fln_text);
    if (argc>2 && (n_word_top=atoi(argv[2]))<1)
        ERREXIT("invalid number of the most frequent words");
    if (!(word_p=calloc(word_allo,sizeof(*word_p))) ||
        !(iter_word_to_fq=qsmm_map_iter_create()) ||
        !(map_word_to_fq=
          qsmm_map_create_sz(-1,sizeof(int),&word_compar,0)))
        ERREXIT("out of memory");
    while (1) {
        int cc=fgetc(file_text_p);
        if (cc==EOF) {
            if (ferror(file_text_p))
                ERREXIT("%s: failed to read the file",fln_text);
            if (word_sz>0 && word_add(word_p)<0) ERREXIT("out of memory");
            break;
        }
        if (isalnum(cc)) {
            if (word_sz+2>word_allo) {
                char *new_p;
                int allo=word_allo*3/2;
                if (!(new_p=realloc(word_p,allo)))
                    ERREXIT("out of memory");
                word_p=new_p;
                word_allo=allo;
            }
            word_p[word_sz++]=cc;
            word_p[word_sz]=0;
        }
        else if (word_sz>0) {
            if (word_add(word_p)<0) ERREXIT("out of memory");
            *word_p=0;
            word_sz=0;
        }
    }
    nword=qsmm_map_size(map_word_to_fq);
    if (!(word_fq_p=calloc(nword,sizeof(*word_fq_p))))
        ERREXIT("out of memory");
    for (ii=0, qsmm_map_iter_begin(map_word_to_fq,iter_word_to_fq);
         !qsmm_map_iter_is_end(map_word_to_fq,iter_word_to_fq);
         ii++, qsmm_map_iter_next(iter_word_to_fq)) {
        word_fq_p[ii].word=qsmm_map_iter_key(iter_word_to_fq);
        word_fq_p[ii].fq=*((int *) qsmm_map_iter_val(iter_word_to_fq));
    }
    qsort(word_fq_p,nword,sizeof(*word_fq_p),&word_fq_compar);
    for (ii=0; ii<nword; ii++) {
        printf("%d\t%s\n", word_fq_p[ii].fq, word_fq_p[ii].word);
        if (n_word_top>0 && ii>=n_word_top) break;
    }
    exit_code=0;

Exit:
    if (map_word_to_fq) {
        if (iter_word_to_fq)
            for (qsmm_map_iter_begin(map_word_to_fq,iter_word_to_fq);
                 !qsmm_map_iter_is_end(map_word_to_fq,iter_word_to_fq);
                 qsmm_map_iter_next(iter_word_to_fq))
                free(qsmm_map_iter_key(iter_word_to_fq));
        qsmm_map_destroy(map_word_to_fq);
    }
    if (iter_word_to_fq) qsmm_map_iter_destroy(iter_word_to_fq);
    if (file_text_p) fclose(file_text_p);
    if (word_fq_p) free(word_fq_p);
    if (word_p) free(word_p);
    return exit_code;
}

Below there is given sample program output. The program prints 10 most frequent words contained in its source text.

$ ./fqtab fqtab.c 10
86      word
52      fq
32      p
32      to
32      iter
32      map
23      0
23      if
21      qsmm
12      int
11      ii

Note that frequencies of words calculated for the text of the example program presented in this subsection will differ from frequencies of words calculated for file samples/fqtab.c in the package distribution because the file is prepended with a license block.


Previous: , Up: The Implementation of Functionality of STL map Template   [Contents][Index]