diff options
| -rw-r--r-- | src/mem_internal_check.c | 245 |
1 files changed, 221 insertions, 24 deletions
diff --git a/src/mem_internal_check.c b/src/mem_internal_check.c index e3b60a5..080230c 100644 --- a/src/mem_internal_check.c +++ b/src/mem_internal_check.c @@ -13,11 +13,14 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <fcntl.h> #include <stdbool.h> #include <stdint.h> #include <errno.h> #include <ctype.h> #include <inttypes.h> +#include <sys/mman.h> +#include <sys/stat.h> #include "output.h" #include "hex_conversion.h" @@ -55,11 +58,14 @@ struct data_array { void apply_filter_to_array(struct data_field *array, unsigned char *data); int compare_data_fields(const void *a, const void *b); int compare_string_data_field(const void *a, const void *b); +bool dump_array(char *target_filename, struct data_array *da); void fprint_array(FILE *fd, struct data_array *array); struct data_array *import_file_into_array(char *filename); bool is_valid_raw_data(char *s, bool cut); +struct data_array *load_dumped_array(char *dump_filename, char *plain_filename); bool run_file_on_filter_array(struct data_array *da, char *filename, FILE *output); bool search_data_in_array(struct data_array *da, unsigned char *data); +void set_entry_points(struct data_array *da); void sort_array(struct data_array *da); @@ -116,15 +122,15 @@ inline bool search_data_in_array(struct data_array *array, unsigned char *data) inline void sort_array(struct data_array *da) { - size_t i = 0, p = 0; - struct data_field *f; - if (da == NULL) return; - qsort(da->fields, da->length, sizeof(struct data_field), compare_data_fields); + set_entry_points(da); +} + - /* set entry points */ - f = da->fields; +inline void set_entry_points(struct data_array *da) { + size_t i = 0, p = 0; + struct data_field *f = da->fields; for (i=1,p=0; i<da->length; i++, p++) { if (f[p].data[0] != f[i].data[0]) { da->entry_points[f[i].data[0]] = i; @@ -163,6 +169,113 @@ inline bool is_valid_raw_data(char *s, bool cut) { return true; } +/* Performance improvement dump the array into an binary file. + * When loading it compare the mtime on the FS. As long as the binary + * is in a more recent state import the data from there. Check in the + * calling function whether the dump was reloaded, if so skip another + * dump. + * The dump is mapped via mmap into the memory and NOT directly loaded + * with an fread loop or similar. So the program relies on the FS cache + * when set_entry_points is called. + */ +inline bool dump_array(char *target_filename, struct data_array *da) { + FILE *fd = NULL; + size_t written = 0; + + if (target_filename == NULL || da == NULL || da->fields == NULL) { + LOGERR("ERROR: target_filename %s / data_array %s / data_field %s\n", + ((target_filename==NULL) ? "NULL" : target_filename), + ((da==NULL) ? "NULL" : "given"), + ((da->fields==NULL) ? "NULL" : "given")); + return false; + } + + if ((fd=fopen(target_filename, "w")) == NULL) { + LOGERR("ERROR: Failed to open file %s to write binary: %s (errno %d)\n", + target_filename, strerror(errno), errno); + return false; + } + /* for performance no check. Instead afterwards a check whether the + * file is as large as expected */ + written = fwrite(da->fields, sizeof(struct data_field), da->length, fd); + DBGTRC("DEBUG: Written %lu elements\n", written); + fflush(fd); + fclose(fd); + + if (written != da->length) { + LOGERR("ERROR: Failed to write %lu elements, wrote %lu\n", + da->length, written); + if (unlink(target_filename) != 0) { + LOGERR("ERROR: failed to remove file %s: %s (errno %d)\n", + target_filename, strerror(errno), errno); + } + return false; + } + return true; +} + + +struct data_array *load_dumped_array(char *dump_filename, char *plain_filename) { + int fdin=-1; + size_t fsize = 0; + struct data_array *da = NULL; + struct stat stat_dump, stat_plain; + + if (dump_filename == NULL || plain_filename == NULL) { + LOGERR("ERROR: dump_filename %s / plain_filename %s\n", + ((dump_filename==NULL)?"NULL":dump_filename), + ((plain_filename==NULL)?"NULL":plain_filename)); + return NULL; + } + + if (stat(dump_filename, &stat_dump) != 0) { + LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n", + dump_filename, strerror(errno), errno); + return NULL; + } + if (stat(plain_filename, &stat_plain) != 0) { + LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n", + plain_filename, strerror(errno), errno); + return NULL; + } + + if (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec) { + return NULL; + } + + if ((da=calloc(1,sizeof(struct data_array))) == NULL) { + LOGERR("ERROR: Failed to allocate a few bytes.\n"); + return NULL; + } + + if (stat_dump.st_size <= 0) { + free(da); + return NULL; + } + fsize = (size_t)stat_dump.st_size; + da->length = fsize/sizeof(struct data_field); + + if ((fdin=open(dump_filename, O_RDONLY )) < 0) { + LOGERR("ERROR: Failed to open file %s to read binary: %s (errno %d)\n", + dump_filename, strerror(errno), errno); + free(da); + return NULL; + } + + da->fields = mmap(0, fsize, PROT_READ, MAP_PRIVATE, fdin, 0); + if (da->fields == MAP_FAILED) { + LOGERR("ERROR: Failed to map file %s into memory: %s (errno %d)\n", + dump_filename, strerror(errno), errno); + close(fdin); + free(da); + return NULL; + } + set_entry_points(da); + close(fdin); + + return da; +} + struct data_array *import_file_into_array(char *filename) { FILE *fd = NULL; @@ -190,6 +303,8 @@ struct data_array *import_file_into_array(char *filename) { } fclose(fd); + DBGTRC("DEBUG: valid lines %lu\n", line_nr); + /* only complete close and open worked reliably, let's just hope nobody modified the file * TODO: compare stat() mtime? */ if ((fd = fopen(filename, "r")) == NULL) { @@ -303,44 +418,122 @@ void fprint_array(FILE *fd, struct data_array *da) { int main(int argc, char **argv) { FILE *output = NULL; - int i = 3; - size_t s = 0; + int first_data = 2, opt = 0, filter_index = 1, output_index = 0; + size_t s = 0; + bool work_with_dump = false, map_from_dump = false; struct data_array *array; struct timespec t1, t2, tdiff; + struct stat stat_dump, stat_plain; + char dump_fname[4096] = ""; if (argc < 3) { fprintf(stderr, "Usage: %s output filter_file data_file...\n\n", argv[0]); + fprintf(stderr, "or %s -b output filter_file data_file...\n\n", argv[0]); fprintf(stderr, "Loads filters into memory, does NOT remove duplicates\n"); + fprintf(stderr, "The -b argument loads and/or dumps to filter_file.dump\n" + "in case the file is older than the dump.\n"); return EXIT_FAILURE; } - s = strlen(argv[1]); + memset(dump_fname, '\0', 4096); + + while ((opt = getopt(argc, argv, "b")) != -1) { + switch (opt) { + case 'b': + work_with_dump = true; + map_from_dump = true; + DBGTRC("DEBUG: enabled mapping and dumping\n"); + break; + default: + LOGERR("ERROR: Unknown option %c.\n", opt); + exit(EXIT_FAILURE); + }; + } + + output_index += optind; + filter_index += optind; + first_data += optind; + + DBGTRC("DEBUG: filter index %d / i %d / optind %d / output_index %d\n", + filter_index, first_data, optind, output_index); + DBGTRC("DEBUG: output_file %s\n", argv[output_index]); + DBGTRC("DEBUG: filter_file %s\n", argv[filter_index]); + DBGTRC("DEBUG: first data %s\n", argv[first_data]); + + /* Memory dump and map check */ + if (work_with_dump) { + memcpy(dump_fname, argv[filter_index], strlen(argv[filter_index])); + s = strlen(dump_fname); + if (s > 0 && s < 4089) { + dump_fname[s] = '.'; + dump_fname[s+1] = 'd'; + dump_fname[s+2] = 'u'; + dump_fname[s+3] = 'm'; + dump_fname[s+4] = 'p'; + dump_fname[s+5] = '\0'; + } else { + work_with_dump = false; + map_from_dump = false; + } + + if (stat(dump_fname, &stat_dump) != 0) { + /* First time there's no dump */ + DBGTRC("DEBUG: deactivate mapping due to stat failed on dump '%s'\n", + dump_fname); + map_from_dump = false; + } + if (stat(argv[filter_index], &stat_plain) != 0) { + LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n", + argv[filter_index], strerror(errno), errno); + exit(EXIT_FAILURE); + } + + if (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec) { + map_from_dump = false; + } + } + + s = strlen(argv[output_index]); if (s == 6 && (strncmp("stdout", argv[1], 6) == 0)) { output = stdout; + work_with_dump = false; } else { - if ((output=fopen(argv[1], "w")) == NULL) { + if ((output=fopen(argv[output_index], "w")) == NULL) { LOGERR("ERROR: Failed to open file '%s': %s (errno %d)\n", argv[1], strerror(errno), errno); return EXIT_FAILURE; } } - LOGERR("IMPORT FILE %s\n", argv[2]); - array = import_file_into_array(argv[2]); - if (array == NULL) { - return EXIT_FAILURE; + if (map_from_dump) { + LOGERR("MAP from file %s\n", dump_fname); + array = load_dumped_array(dump_fname, argv[filter_index]); + if (array == NULL) { + return EXIT_FAILURE; + } + } else { + LOGERR("IMPORT FILE %s\n", argv[filter_index]); + array = import_file_into_array(argv[filter_index]); + if (array == NULL) { + return EXIT_FAILURE; + } + LOGERR("run qsort on in-memory data\n"); + TU_MEASURE_TIME( CLOCK_PROCESS_CPUTIME_ID, &t1, &t2, + sort_array(array); + ); + difftime_timespec(t1, t2, &tdiff); + LOGERR("Sorted in %lu s and %lu ns process CPU time\n", tdiff.tv_sec, tdiff.tv_nsec ); } - LOGERR("run qsort on in-memory data\n"); - TU_MEASURE_TIME( CLOCK_PROCESS_CPUTIME_ID, &t1, &t2, - sort_array(array); - ); - difftime_timespec(t1, t2, &tdiff); - LOGERR("Sorted in %lu s and %lu ns process CPU time\n", tdiff.tv_sec, tdiff.tv_nsec ); + if (work_with_dump && + (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec)) { + LOGERR("DUMP filter to %s\n", dump_fname); + dump_array(dump_fname, array); + } - for (i=3; i<argc; i++) { - LOGERR("APPLY FILTER ON FILE %s\n", argv[i]); - if (!run_file_on_filter_array(array, argv[i], output)) { + for (; first_data<argc; first_data++) { + LOGERR("APPLY FILTER ON FILE %s\n", argv[first_data]); + if (!run_file_on_filter_array(array, argv[first_data], output)) { LOGERR("ERROR: Aborting.\n"); return EXIT_FAILURE; } @@ -348,7 +541,11 @@ int main(int argc, char **argv) { fflush(output); fclose(output); - free(array->fields); + if ( ! map_from_dump ) { + free(array->fields); + } else { + munmap(array->fields, (array->length*sizeof(struct data_field))); + } free(array); return EXIT_SUCCESS; |
