00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef lint
00039 static char copyright[] =
00040 "@(#) Copyright (c) 2002\n\
00041 Netherlands Forensic Institute. All rights reserved.\n";
00042 #endif
00043
00044 #ifdef HAVE_CONFIG_H
00045 #include <config.h>
00046 #endif
00047
00048 #include <math.h>
00049 #include <string.h>
00050 #include <stdio.h>
00051 #include <unistd.h>
00052
00053 #include "rdd.h"
00054 #include "rdd_internals.h"
00055 #include "error.h"
00056 #include "writer.h"
00057 #include "filter.h"
00058 #include "msgprinter.h"
00059
00060 #define NUM_BYTE_VAL 256
00061
00062 #define RDD_LN2 0.69314718055994530942
00063
00064 typedef struct _RDD_STATS_BLOCKFILTER {
00065 rdd_count_t blocknum;
00066 unsigned histogram[NUM_BYTE_VAL];
00067 unsigned minbyte;
00068 unsigned maxbyte;
00069 char *path;
00070 RDD_MSGPRINTER *printer;
00071 } RDD_STATS_BLOCKFILTER;
00072
00073 static int stats_input(RDD_FILTER *f,
00074 const unsigned char *buf, unsigned nbyte);
00075 static int stats_block(RDD_FILTER *f, unsigned nbyte);
00076 static int stats_close(RDD_FILTER *f);
00077 static int stats_free(RDD_FILTER *f);
00078
00079 static RDD_FILTER_OPS stats_ops = {
00080 stats_input,
00081 stats_block,
00082 stats_close,
00083 0,
00084 stats_free
00085 };
00086
00087
00088 int
00089 rdd_new_stats_blockfilter(RDD_FILTER **self, unsigned blocksize,
00090 const char *outpath, const int force_overwrite)
00091 {
00092 RDD_FILTER *f = 0;
00093 RDD_STATS_BLOCKFILTER *state = 0;
00094 char *path = 0;
00095 RDD_MSGPRINTER *prn = 0;
00096 int rc;
00097
00098 rc = rdd_new_filter(&f, &stats_ops, sizeof(RDD_STATS_BLOCKFILTER),
00099 blocksize);
00100 if (rc != RDD_OK) {
00101 goto error;
00102 }
00103 state = (RDD_STATS_BLOCKFILTER *) f->state;
00104
00105 if ((path = malloc(strlen(outpath) + 1)) == 0) {
00106 rc = RDD_NOMEM;
00107 goto error;
00108 }
00109 strcpy(path, outpath);
00110
00111 if ((rc = rdd_mp_open_file_printer(&prn, outpath, force_overwrite)) != RDD_OK) {
00112 goto error;
00113 }
00114
00115 state->blocknum = 0;
00116 memset(state->histogram, 0, sizeof(state->histogram));
00117 state->minbyte = NUM_BYTE_VAL - 1;
00118 state->maxbyte = 0;
00119 state->path = path;
00120 state->printer = prn;
00121
00122 *self = f;
00123 return RDD_OK;
00124
00125 error:
00126 *self = 0;
00127 if (path != 0) free(path);
00128 if (state != 0) free(state);
00129 if (f != 0) free(f);
00130 return rc;
00131 }
00132
00139 static void
00140 compute_histogram_stats(RDD_STATS_BLOCKFILTER *state,
00141 unsigned block_size,
00142 double *entropy,
00143 unsigned *modus_byteval, unsigned *modus_count)
00144 {
00145 unsigned byte, count;
00146 unsigned mval, mcount;
00147 double p, ent;
00148
00149 ent = 0.0;
00150 mval = 0;
00151 mcount = state->histogram[mval];
00152
00153 for (byte = 0; byte < NUM_BYTE_VAL; byte++) {
00154 count = state->histogram[byte];
00155 if (count > 0) {
00156 p = ((double) count) / ((double) block_size);
00157 ent += -p * (log(p) / RDD_LN2);
00158 }
00159 if (count > mcount) {
00160 mval = byte;
00161 mcount = count;
00162 }
00163 }
00164
00165 *entropy = ent;
00166 *modus_byteval = mval;
00167 *modus_count = mcount;
00168 }
00169
00173 static int
00174 stats_input(RDD_FILTER *f, const unsigned char *buf, unsigned nbyte)
00175 {
00176 RDD_STATS_BLOCKFILTER *state = (RDD_STATS_BLOCKFILTER *) f->state;
00177 unsigned byte;
00178 unsigned i;
00179
00180 for (i = 0; i < nbyte; i++) {
00181 byte = buf[i];
00182 state->histogram[byte]++;
00183
00184 if (byte < state->minbyte) state->minbyte = byte;
00185 if (byte > state->maxbyte) state->maxbyte = byte;
00186 }
00187
00188 return RDD_OK;
00189 }
00190
00194 static int
00195 stats_block(RDD_FILTER *f, unsigned nbyte)
00196 {
00197 RDD_STATS_BLOCKFILTER *state = (RDD_STATS_BLOCKFILTER *) f->state;
00198 unsigned modus, fmodus;
00199 double entropy;
00200
00201 compute_histogram_stats(state, nbyte, &entropy, &modus, &fmodus);
00202
00203 rdd_mp_message(state->printer, RDD_MSG_INFO,
00204 "%llu\t%u\t%u\t%u\t%u\t%lf",
00205 state->blocknum,
00206 state->minbyte, state->maxbyte,
00207 modus, fmodus,
00208 entropy);
00209
00210 state->blocknum++;
00211 memset(state->histogram, 0, sizeof(state->histogram));
00212 state->minbyte = NUM_BYTE_VAL - 1;
00213 state->maxbyte = 0;
00214
00215 return RDD_OK;
00216 }
00217
00218 static int
00219 stats_close(RDD_FILTER *f)
00220 {
00221 RDD_STATS_BLOCKFILTER *state = (RDD_STATS_BLOCKFILTER *) f->state;
00222 int rc;
00223
00224 rc = rdd_mp_close(state->printer, RDD_MP_RECURSE|RDD_MP_READONLY);
00225 if (rc != RDD_OK) {
00226 return rc;
00227 }
00228
00229 return RDD_OK;
00230 }
00231
00232 static int
00233 stats_free(RDD_FILTER *f)
00234 {
00235 RDD_STATS_BLOCKFILTER *state = (RDD_STATS_BLOCKFILTER *) f->state;
00236
00237 free(state->path);
00238
00239 return RDD_OK;
00240 }