diff options
| -rw-r--r-- | include/database_interaction.h | 3 | ||||
| -rw-r--r-- | include/file_processor.h | 4 | ||||
| -rw-r--r-- | src/database_interaction.c | 131 | ||||
| -rw-r--r-- | src/file_processor.c | 19 |
4 files changed, 156 insertions, 1 deletions
diff --git a/include/database_interaction.h b/include/database_interaction.h index 5ff6f66..c236cf2 100644 --- a/include/database_interaction.h +++ b/include/database_interaction.h @@ -29,8 +29,11 @@ int dbi_insert_hashes(const char *blake2, const char *sha256, const char *sha512 int64_t dbi_select_fileinfo_by_hash_path_filename_ids(int64_t hash_id, int64_t path_id, int64_t filename_id); int64_t dbi_select_fileinfo_by_path_filename_ids(int64_t pname_id, int64_t fname_id); +int dbi_select_fileinfo_by_id_resolved(int64_t id, struct df_fileinfo *fi); int dbi_insert_fileinfo(struct df_fileinfo *fi); +int dbi_fill_fileinfo(struct df_fileinfo *fi); + int dbi_update_fileinfo_last_seen(int64_t id); int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id); diff --git a/include/file_processor.h b/include/file_processor.h index 06b6d1d..e8f66f2 100644 --- a/include/file_processor.h +++ b/include/file_processor.h @@ -7,6 +7,8 @@ #ifndef FILE_PROCESSOR_H #define FILE_PROCESSOR_H +#include <stdint.h> +#include <time.h> #include <sys/stat.h> #define DF_BYTE_SIZE_256 32 @@ -44,7 +46,9 @@ struct df_fileinfo { unsigned char sha512[DF_BYTE_SIZE_512]; /**< The SHA512 hash in binary form. */ #endif struct df_hashstrings hashes; /**< The BLAKE2-512, SHA256 and SHA512 hash of the file */ + time_t last_seen; /**< time_t value when the file was last seen, filled by the database */ struct stat statbuf; /**< Result of lstat() call. Symlinks are to be ignored and filtered out earlier. */ + int64_t id; /**< to be set only when filled by the database */ }; diff --git a/src/database_interaction.c b/src/database_interaction.c index 5188a35..9e29c17 100644 --- a/src/database_interaction.c +++ b/src/database_interaction.c @@ -923,6 +923,136 @@ int dbi_insert_fileinfo(struct df_fileinfo *fi) { return rc; }; +/** + * Fill the given struct (path and filename required) with database content if available. + * + * This can be used to speed up a rescan if the stat matches and last mtime was before last_seen + */ +int dbi_fill_fileinfo(struct df_fileinfo *fi) { + int rc = 0; + int64_t fname_id, pname_id, existing_entry = 0; + + + DBCONN_CHECK(-1); + + if (fi == NULL) { + LOGERR("ERROR: No fileinfo given.\n"); + return -1; + } + + fname_id = dbi_select_filename_by_name(fi->name); + pname_id = dbi_select_path_by_pathname(fi->path); + + /* Any problems with the selects? */ + if (fname_id <0 || pname_id <0) { + DBGTRC("ERROR: abort due to previous error.\n"); + return -1; + } + + /* Abort if no matching entry in the table */ + existing_entry = dbi_select_fileinfo_by_path_filename_ids(pname_id, fname_id); + if (existing_entry < 0) { + fi->id = 0; + return 0; + } + + /* TODO: The following has be moved to a dbi_select_fileinfo_by_id_resolved */ + rc = dbi_select_fileinfo_by_id_resolved(existing_entry, fi); + + return rc; +} + + + +int dbi_select_fileinfo_by_id_resolved(int64_t id, struct df_fileinfo *fi) { + int strc = 0; + size_t len; + char *tmp = NULL; + const unsigned char *txt = NULL; + sqlite3_stmt *st = select_fileinfo_by_id_resolved; + + + DBCONN_CHECK(-1); + + if (id <=0) { + LOGERR("ERROR: No id given.\n"); + return -1; + } + + if (fi == NULL) { + LOGERR("ERROR: No fileinfo struct given.\n"); + return -1; + } + + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_int64(st, 1,id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -1; + } + + strc = sqlite3_step(st); + + if (strc == SQLITE_DONE) { + return -1; + } + + if (strc != SQLITE_ROW) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + return -1; + } + + if (fi->path == NULL) { + txt = sqlite3_column_text(st, 0); /* paths.pathname */ + len = (size_t)sqlite3_column_bytes(st, 0); + if ((tmp = calloc(len+1, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate a few bytes memory\n"); + return -1; + } + memcpy(tmp, txt, len); + fi->path = tmp; + } + + if (fi->name == NULL) { + txt = sqlite3_column_text(st, 1); /* filenames.name */ + len = (size_t)sqlite3_column_bytes(st, 1); + if ((tmp = calloc(len+1, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate a few bytes memory\n"); + return -1; + } + memcpy(tmp, txt, len); + fi->path = tmp; + } + + txt = sqlite3_column_text(st, 2); /* hashes.blake2 */ + memcpy(fi->hashes.blake2, txt, DF_STR_SIZE_512); + fi->hashes.blake2[DF_STR_SIZE_512] = '\0'; /* array has the additional nul byte space */ + + txt = sqlite3_column_text(st, 3); /* hashes.sha256 */ + memcpy(fi->hashes.sha256, txt, DF_STR_SIZE_256); + fi->hashes.sha256[DF_STR_SIZE_256] = '\0'; /* array has the additional nul byte space */ + + txt = sqlite3_column_text(st, 4); /* hashes.sha512 */ + memcpy(fi->hashes.sha512, txt, DF_STR_SIZE_512); + fi->hashes.sha512[DF_STR_SIZE_512] = '\0'; /* array has the additional nul byte space */ + + /* Ignore the item 5 from the query, it's the filesize we have in the stat struct */ + + fi->last_seen = (time_t) sqlite3_column_int64(st, 6); /* last_seen */ + + /* struct stat statbuf */ + memcpy(&(fi->statbuf), sqlite3_column_blob(st, 7), (size_t)sqlite3_column_bytes(st, 7)); + + fi->id = id; + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + + return 0; +} /** * Print the database content to the given filedescriptor in a CSV form which can be processed with grep awk etc. @@ -1363,7 +1493,6 @@ int dbi_print_filenames(FILE *out) { #if 0 *select_fileinfo_by_id, - *select_fileinfo_by_id_resolved, *select_fileinfo_by_path_id, *select_fileinfo_by_filename_id, *select_fileinfo_by_path_filename_ids, diff --git a/src/file_processor.c b/src/file_processor.c index b2b4f85..0f4af9f 100644 --- a/src/file_processor.c +++ b/src/file_processor.c @@ -190,6 +190,7 @@ int process_file(struct df_fileinfo *info) { size_t bytes_read; struct df_md_components *ctx_pkg; bool error_in_loop = false; + struct df_fileinfo info_from_db; unsigned char md_val[EVP_MAX_MD_SIZE]; unsigned int md_len; @@ -218,6 +219,24 @@ int process_file(struct df_fileinfo *info) { return -1; } + /* filesystem information collected check whether the DB has a corresponding value, + * if so check by size and time whether it looks modified if not, skip + * TODO: option_force needs to be implemented AND memory clean up*/ + info_from_db.path = info->path; + info_from_db.name = info->name; + if ((dbi_fill_fileinfo(&info_from_db) == 0) && + (info->statbuf.st_size == info_from_db.statbuf.st_size) && + (info->statbuf.st_mtim.tv_sec < info_from_db.last_seen) && + (info->statbuf.st_mtim.tv_sec == info_from_db.statbuf.st_mtim.tv_sec) && + (info->statbuf.st_mtim.tv_nsec == info_from_db.statbuf.st_mtim.tv_nsec)) { + if ( ! option_quiet ) { + LOGERR("Skip file '%s' file unchanged according to metadata\n", fullpath); + } + dbi_update_fileinfo_last_seen(info_from_db.id); + return 0; + } + + if ((ctx_pkg = init_md_components()) == NULL) { LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n", fullpath); |
