aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/database_interaction.h3
-rw-r--r--include/file_processor.h4
-rw-r--r--src/database_interaction.c131
-rw-r--r--src/file_processor.c19
4 files changed, 156 insertions, 1 deletions
diff --git a/include/database_interaction.h b/include/database_interaction.h
index 5ff6f66..c236cf2 100644
--- a/include/database_interaction.h
+++ b/include/database_interaction.h
@@ -29,8 +29,11 @@ int dbi_insert_hashes(const char *blake2, const char *sha256, const char *sha512
int64_t dbi_select_fileinfo_by_hash_path_filename_ids(int64_t hash_id, int64_t path_id, int64_t filename_id);
int64_t dbi_select_fileinfo_by_path_filename_ids(int64_t pname_id, int64_t fname_id);
+int dbi_select_fileinfo_by_id_resolved(int64_t id, struct df_fileinfo *fi);
int dbi_insert_fileinfo(struct df_fileinfo *fi);
+int dbi_fill_fileinfo(struct df_fileinfo *fi);
+
int dbi_update_fileinfo_last_seen(int64_t id);
int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id);
diff --git a/include/file_processor.h b/include/file_processor.h
index 06b6d1d..e8f66f2 100644
--- a/include/file_processor.h
+++ b/include/file_processor.h
@@ -7,6 +7,8 @@
#ifndef FILE_PROCESSOR_H
#define FILE_PROCESSOR_H
+#include <stdint.h>
+#include <time.h>
#include <sys/stat.h>
#define DF_BYTE_SIZE_256 32
@@ -44,7 +46,9 @@ struct df_fileinfo {
unsigned char sha512[DF_BYTE_SIZE_512]; /**< The SHA512 hash in binary form. */
#endif
struct df_hashstrings hashes; /**< The BLAKE2-512, SHA256 and SHA512 hash of the file */
+ time_t last_seen; /**< time_t value when the file was last seen, filled by the database */
struct stat statbuf; /**< Result of lstat() call. Symlinks are to be ignored and filtered out earlier. */
+ int64_t id; /**< to be set only when filled by the database */
};
diff --git a/src/database_interaction.c b/src/database_interaction.c
index 5188a35..9e29c17 100644
--- a/src/database_interaction.c
+++ b/src/database_interaction.c
@@ -923,6 +923,136 @@ int dbi_insert_fileinfo(struct df_fileinfo *fi) {
return rc;
};
+/**
+ * Fill the given struct (path and filename required) with database content if available.
+ *
+ * This can be used to speed up a rescan if the stat matches and last mtime was before last_seen
+ */
+int dbi_fill_fileinfo(struct df_fileinfo *fi) {
+ int rc = 0;
+ int64_t fname_id, pname_id, existing_entry = 0;
+
+
+ DBCONN_CHECK(-1);
+
+ if (fi == NULL) {
+ LOGERR("ERROR: No fileinfo given.\n");
+ return -1;
+ }
+
+ fname_id = dbi_select_filename_by_name(fi->name);
+ pname_id = dbi_select_path_by_pathname(fi->path);
+
+ /* Any problems with the selects? */
+ if (fname_id <0 || pname_id <0) {
+ DBGTRC("ERROR: abort due to previous error.\n");
+ return -1;
+ }
+
+ /* Abort if no matching entry in the table */
+ existing_entry = dbi_select_fileinfo_by_path_filename_ids(pname_id, fname_id);
+ if (existing_entry < 0) {
+ fi->id = 0;
+ return 0;
+ }
+
+ /* TODO: The following has be moved to a dbi_select_fileinfo_by_id_resolved */
+ rc = dbi_select_fileinfo_by_id_resolved(existing_entry, fi);
+
+ return rc;
+}
+
+
+
+int dbi_select_fileinfo_by_id_resolved(int64_t id, struct df_fileinfo *fi) {
+ int strc = 0;
+ size_t len;
+ char *tmp = NULL;
+ const unsigned char *txt = NULL;
+ sqlite3_stmt *st = select_fileinfo_by_id_resolved;
+
+
+ DBCONN_CHECK(-1);
+
+ if (id <=0) {
+ LOGERR("ERROR: No id given.\n");
+ return -1;
+ }
+
+ if (fi == NULL) {
+ LOGERR("ERROR: No fileinfo struct given.\n");
+ return -1;
+ }
+
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_int64(st, 1,id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -1;
+ }
+
+ strc = sqlite3_step(st);
+
+ if (strc == SQLITE_DONE) {
+ return -1;
+ }
+
+ if (strc != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ return -1;
+ }
+
+ if (fi->path == NULL) {
+ txt = sqlite3_column_text(st, 0); /* paths.pathname */
+ len = (size_t)sqlite3_column_bytes(st, 0);
+ if ((tmp = calloc(len+1, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate a few bytes memory\n");
+ return -1;
+ }
+ memcpy(tmp, txt, len);
+ fi->path = tmp;
+ }
+
+ if (fi->name == NULL) {
+ txt = sqlite3_column_text(st, 1); /* filenames.name */
+ len = (size_t)sqlite3_column_bytes(st, 1);
+ if ((tmp = calloc(len+1, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate a few bytes memory\n");
+ return -1;
+ }
+ memcpy(tmp, txt, len);
+ fi->path = tmp;
+ }
+
+ txt = sqlite3_column_text(st, 2); /* hashes.blake2 */
+ memcpy(fi->hashes.blake2, txt, DF_STR_SIZE_512);
+ fi->hashes.blake2[DF_STR_SIZE_512] = '\0'; /* array has the additional nul byte space */
+
+ txt = sqlite3_column_text(st, 3); /* hashes.sha256 */
+ memcpy(fi->hashes.sha256, txt, DF_STR_SIZE_256);
+ fi->hashes.sha256[DF_STR_SIZE_256] = '\0'; /* array has the additional nul byte space */
+
+ txt = sqlite3_column_text(st, 4); /* hashes.sha512 */
+ memcpy(fi->hashes.sha512, txt, DF_STR_SIZE_512);
+ fi->hashes.sha512[DF_STR_SIZE_512] = '\0'; /* array has the additional nul byte space */
+
+ /* Ignore the item 5 from the query, it's the filesize we have in the stat struct */
+
+ fi->last_seen = (time_t) sqlite3_column_int64(st, 6); /* last_seen */
+
+ /* struct stat statbuf */
+ memcpy(&(fi->statbuf), sqlite3_column_blob(st, 7), (size_t)sqlite3_column_bytes(st, 7));
+
+ fi->id = id;
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+
+ return 0;
+}
/**
* Print the database content to the given filedescriptor in a CSV form which can be processed with grep awk etc.
@@ -1363,7 +1493,6 @@ int dbi_print_filenames(FILE *out) {
#if 0
*select_fileinfo_by_id,
- *select_fileinfo_by_id_resolved,
*select_fileinfo_by_path_id,
*select_fileinfo_by_filename_id,
*select_fileinfo_by_path_filename_ids,
diff --git a/src/file_processor.c b/src/file_processor.c
index b2b4f85..0f4af9f 100644
--- a/src/file_processor.c
+++ b/src/file_processor.c
@@ -190,6 +190,7 @@ int process_file(struct df_fileinfo *info) {
size_t bytes_read;
struct df_md_components *ctx_pkg;
bool error_in_loop = false;
+ struct df_fileinfo info_from_db;
unsigned char md_val[EVP_MAX_MD_SIZE];
unsigned int md_len;
@@ -218,6 +219,24 @@ int process_file(struct df_fileinfo *info) {
return -1;
}
+ /* filesystem information collected check whether the DB has a corresponding value,
+ * if so check by size and time whether it looks modified if not, skip
+ * TODO: option_force needs to be implemented AND memory clean up*/
+ info_from_db.path = info->path;
+ info_from_db.name = info->name;
+ if ((dbi_fill_fileinfo(&info_from_db) == 0) &&
+ (info->statbuf.st_size == info_from_db.statbuf.st_size) &&
+ (info->statbuf.st_mtim.tv_sec < info_from_db.last_seen) &&
+ (info->statbuf.st_mtim.tv_sec == info_from_db.statbuf.st_mtim.tv_sec) &&
+ (info->statbuf.st_mtim.tv_nsec == info_from_db.statbuf.st_mtim.tv_nsec)) {
+ if ( ! option_quiet ) {
+ LOGERR("Skip file '%s' file unchanged according to metadata\n", fullpath);
+ }
+ dbi_update_fileinfo_last_seen(info_from_db.id);
+ return 0;
+ }
+
+
if ((ctx_pkg = init_md_components()) == NULL) {
LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n",
fullpath);