aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mem_internal_check.c245
1 files changed, 221 insertions, 24 deletions
diff --git a/src/mem_internal_check.c b/src/mem_internal_check.c
index e3b60a5..080230c 100644
--- a/src/mem_internal_check.c
+++ b/src/mem_internal_check.c
@@ -13,11 +13,14 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <errno.h>
#include <ctype.h>
#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
#include "output.h"
#include "hex_conversion.h"
@@ -55,11 +58,14 @@ struct data_array {
void apply_filter_to_array(struct data_field *array, unsigned char *data);
int compare_data_fields(const void *a, const void *b);
int compare_string_data_field(const void *a, const void *b);
+bool dump_array(char *target_filename, struct data_array *da);
void fprint_array(FILE *fd, struct data_array *array);
struct data_array *import_file_into_array(char *filename);
bool is_valid_raw_data(char *s, bool cut);
+struct data_array *load_dumped_array(char *dump_filename, char *plain_filename);
bool run_file_on_filter_array(struct data_array *da, char *filename, FILE *output);
bool search_data_in_array(struct data_array *da, unsigned char *data);
+void set_entry_points(struct data_array *da);
void sort_array(struct data_array *da);
@@ -116,15 +122,15 @@ inline bool search_data_in_array(struct data_array *array, unsigned char *data)
inline void sort_array(struct data_array *da) {
- size_t i = 0, p = 0;
- struct data_field *f;
-
if (da == NULL) return;
-
qsort(da->fields, da->length, sizeof(struct data_field), compare_data_fields);
+ set_entry_points(da);
+}
+
- /* set entry points */
- f = da->fields;
+inline void set_entry_points(struct data_array *da) {
+ size_t i = 0, p = 0;
+ struct data_field *f = da->fields;
for (i=1,p=0; i<da->length; i++, p++) {
if (f[p].data[0] != f[i].data[0]) {
da->entry_points[f[i].data[0]] = i;
@@ -163,6 +169,113 @@ inline bool is_valid_raw_data(char *s, bool cut) {
return true;
}
+/* Performance improvement dump the array into an binary file.
+ * When loading it compare the mtime on the FS. As long as the binary
+ * is in a more recent state import the data from there. Check in the
+ * calling function whether the dump was reloaded, if so skip another
+ * dump.
+ * The dump is mapped via mmap into the memory and NOT directly loaded
+ * with an fread loop or similar. So the program relies on the FS cache
+ * when set_entry_points is called.
+ */
+inline bool dump_array(char *target_filename, struct data_array *da) {
+ FILE *fd = NULL;
+ size_t written = 0;
+
+ if (target_filename == NULL || da == NULL || da->fields == NULL) {
+ LOGERR("ERROR: target_filename %s / data_array %s / data_field %s\n",
+ ((target_filename==NULL) ? "NULL" : target_filename),
+ ((da==NULL) ? "NULL" : "given"),
+ ((da->fields==NULL) ? "NULL" : "given"));
+ return false;
+ }
+
+ if ((fd=fopen(target_filename, "w")) == NULL) {
+ LOGERR("ERROR: Failed to open file %s to write binary: %s (errno %d)\n",
+ target_filename, strerror(errno), errno);
+ return false;
+ }
+ /* for performance no check. Instead afterwards a check whether the
+ * file is as large as expected */
+ written = fwrite(da->fields, sizeof(struct data_field), da->length, fd);
+ DBGTRC("DEBUG: Written %lu elements\n", written);
+ fflush(fd);
+ fclose(fd);
+
+ if (written != da->length) {
+ LOGERR("ERROR: Failed to write %lu elements, wrote %lu\n",
+ da->length, written);
+ if (unlink(target_filename) != 0) {
+ LOGERR("ERROR: failed to remove file %s: %s (errno %d)\n",
+ target_filename, strerror(errno), errno);
+ }
+ return false;
+ }
+ return true;
+}
+
+
+struct data_array *load_dumped_array(char *dump_filename, char *plain_filename) {
+ int fdin=-1;
+ size_t fsize = 0;
+ struct data_array *da = NULL;
+ struct stat stat_dump, stat_plain;
+
+ if (dump_filename == NULL || plain_filename == NULL) {
+ LOGERR("ERROR: dump_filename %s / plain_filename %s\n",
+ ((dump_filename==NULL)?"NULL":dump_filename),
+ ((plain_filename==NULL)?"NULL":plain_filename));
+ return NULL;
+ }
+
+ if (stat(dump_filename, &stat_dump) != 0) {
+ LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n",
+ dump_filename, strerror(errno), errno);
+ return NULL;
+ }
+ if (stat(plain_filename, &stat_plain) != 0) {
+ LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n",
+ plain_filename, strerror(errno), errno);
+ return NULL;
+ }
+
+ if (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec) {
+ return NULL;
+ }
+
+ if ((da=calloc(1,sizeof(struct data_array))) == NULL) {
+ LOGERR("ERROR: Failed to allocate a few bytes.\n");
+ return NULL;
+ }
+
+ if (stat_dump.st_size <= 0) {
+ free(da);
+ return NULL;
+ }
+ fsize = (size_t)stat_dump.st_size;
+ da->length = fsize/sizeof(struct data_field);
+
+ if ((fdin=open(dump_filename, O_RDONLY )) < 0) {
+ LOGERR("ERROR: Failed to open file %s to read binary: %s (errno %d)\n",
+ dump_filename, strerror(errno), errno);
+ free(da);
+ return NULL;
+ }
+
+ da->fields = mmap(0, fsize, PROT_READ, MAP_PRIVATE, fdin, 0);
+ if (da->fields == MAP_FAILED) {
+ LOGERR("ERROR: Failed to map file %s into memory: %s (errno %d)\n",
+ dump_filename, strerror(errno), errno);
+ close(fdin);
+ free(da);
+ return NULL;
+ }
+ set_entry_points(da);
+ close(fdin);
+
+ return da;
+}
+
struct data_array *import_file_into_array(char *filename) {
FILE *fd = NULL;
@@ -190,6 +303,8 @@ struct data_array *import_file_into_array(char *filename) {
}
fclose(fd);
+ DBGTRC("DEBUG: valid lines %lu\n", line_nr);
+
/* only complete close and open worked reliably, let's just hope nobody modified the file
* TODO: compare stat() mtime? */
if ((fd = fopen(filename, "r")) == NULL) {
@@ -303,44 +418,122 @@ void fprint_array(FILE *fd, struct data_array *da) {
int main(int argc, char **argv) {
FILE *output = NULL;
- int i = 3;
- size_t s = 0;
+ int first_data = 2, opt = 0, filter_index = 1, output_index = 0;
+ size_t s = 0;
+ bool work_with_dump = false, map_from_dump = false;
struct data_array *array;
struct timespec t1, t2, tdiff;
+ struct stat stat_dump, stat_plain;
+ char dump_fname[4096] = "";
if (argc < 3) {
fprintf(stderr, "Usage: %s output filter_file data_file...\n\n", argv[0]);
+ fprintf(stderr, "or %s -b output filter_file data_file...\n\n", argv[0]);
fprintf(stderr, "Loads filters into memory, does NOT remove duplicates\n");
+ fprintf(stderr, "The -b argument loads and/or dumps to filter_file.dump\n"
+ "in case the file is older than the dump.\n");
return EXIT_FAILURE;
}
- s = strlen(argv[1]);
+ memset(dump_fname, '\0', 4096);
+
+ while ((opt = getopt(argc, argv, "b")) != -1) {
+ switch (opt) {
+ case 'b':
+ work_with_dump = true;
+ map_from_dump = true;
+ DBGTRC("DEBUG: enabled mapping and dumping\n");
+ break;
+ default:
+ LOGERR("ERROR: Unknown option %c.\n", opt);
+ exit(EXIT_FAILURE);
+ };
+ }
+
+ output_index += optind;
+ filter_index += optind;
+ first_data += optind;
+
+ DBGTRC("DEBUG: filter index %d / i %d / optind %d / output_index %d\n",
+ filter_index, first_data, optind, output_index);
+ DBGTRC("DEBUG: output_file %s\n", argv[output_index]);
+ DBGTRC("DEBUG: filter_file %s\n", argv[filter_index]);
+ DBGTRC("DEBUG: first data %s\n", argv[first_data]);
+
+ /* Memory dump and map check */
+ if (work_with_dump) {
+ memcpy(dump_fname, argv[filter_index], strlen(argv[filter_index]));
+ s = strlen(dump_fname);
+ if (s > 0 && s < 4089) {
+ dump_fname[s] = '.';
+ dump_fname[s+1] = 'd';
+ dump_fname[s+2] = 'u';
+ dump_fname[s+3] = 'm';
+ dump_fname[s+4] = 'p';
+ dump_fname[s+5] = '\0';
+ } else {
+ work_with_dump = false;
+ map_from_dump = false;
+ }
+
+ if (stat(dump_fname, &stat_dump) != 0) {
+ /* First time there's no dump */
+ DBGTRC("DEBUG: deactivate mapping due to stat failed on dump '%s'\n",
+ dump_fname);
+ map_from_dump = false;
+ }
+ if (stat(argv[filter_index], &stat_plain) != 0) {
+ LOGERR("ERROR: failed to get stat() data on %s: %s (errno %d)\n",
+ argv[filter_index], strerror(errno), errno);
+ exit(EXIT_FAILURE);
+ }
+
+ if (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec) {
+ map_from_dump = false;
+ }
+ }
+
+ s = strlen(argv[output_index]);
if (s == 6 && (strncmp("stdout", argv[1], 6) == 0)) {
output = stdout;
+ work_with_dump = false;
} else {
- if ((output=fopen(argv[1], "w")) == NULL) {
+ if ((output=fopen(argv[output_index], "w")) == NULL) {
LOGERR("ERROR: Failed to open file '%s': %s (errno %d)\n",
argv[1], strerror(errno), errno);
return EXIT_FAILURE;
}
}
- LOGERR("IMPORT FILE %s\n", argv[2]);
- array = import_file_into_array(argv[2]);
- if (array == NULL) {
- return EXIT_FAILURE;
+ if (map_from_dump) {
+ LOGERR("MAP from file %s\n", dump_fname);
+ array = load_dumped_array(dump_fname, argv[filter_index]);
+ if (array == NULL) {
+ return EXIT_FAILURE;
+ }
+ } else {
+ LOGERR("IMPORT FILE %s\n", argv[filter_index]);
+ array = import_file_into_array(argv[filter_index]);
+ if (array == NULL) {
+ return EXIT_FAILURE;
+ }
+ LOGERR("run qsort on in-memory data\n");
+ TU_MEASURE_TIME( CLOCK_PROCESS_CPUTIME_ID, &t1, &t2,
+ sort_array(array);
+ );
+ difftime_timespec(t1, t2, &tdiff);
+ LOGERR("Sorted in %lu s and %lu ns process CPU time\n", tdiff.tv_sec, tdiff.tv_nsec );
}
- LOGERR("run qsort on in-memory data\n");
- TU_MEASURE_TIME( CLOCK_PROCESS_CPUTIME_ID, &t1, &t2,
- sort_array(array);
- );
- difftime_timespec(t1, t2, &tdiff);
- LOGERR("Sorted in %lu s and %lu ns process CPU time\n", tdiff.tv_sec, tdiff.tv_nsec );
+ if (work_with_dump &&
+ (stat_plain.st_mtim.tv_sec >= stat_dump.st_mtim.tv_sec)) {
+ LOGERR("DUMP filter to %s\n", dump_fname);
+ dump_array(dump_fname, array);
+ }
- for (i=3; i<argc; i++) {
- LOGERR("APPLY FILTER ON FILE %s\n", argv[i]);
- if (!run_file_on_filter_array(array, argv[i], output)) {
+ for (; first_data<argc; first_data++) {
+ LOGERR("APPLY FILTER ON FILE %s\n", argv[first_data]);
+ if (!run_file_on_filter_array(array, argv[first_data], output)) {
LOGERR("ERROR: Aborting.\n");
return EXIT_FAILURE;
}
@@ -348,7 +541,11 @@ int main(int argc, char **argv) {
fflush(output);
fclose(output);
- free(array->fields);
+ if ( ! map_from_dump ) {
+ free(array->fields);
+ } else {
+ munmap(array->fields, (array->length*sizeof(struct data_field)));
+ }
free(array);
return EXIT_SUCCESS;