diff options
Diffstat (limited to 'src/database_interaction.c')
| -rw-r--r-- | src/database_interaction.c | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/src/database_interaction.c b/src/database_interaction.c index 37ce32c..e9e0b86 100644 --- a/src/database_interaction.c +++ b/src/database_interaction.c @@ -53,10 +53,13 @@ sqlite3 *dbconn = NULL; /* The statements will be wrapped via a function dbi_STATEMENTNAME() for the outside */ sqlite3_stmt *select_filename_by_id, *select_filename_by_name, + *select_filename_complete_table, *select_path_by_id, *select_path_by_pathname, + *select_hashes_all_ids, *select_hashes_by_id, *select_hashes_by_strings, + *select_hashes_complete_table, *select_fileinfo_by_id, *select_fileinfo_by_id_resolved, *select_fileinfo_by_path_id, @@ -64,6 +67,8 @@ sqlite3_stmt *select_filename_by_id, *select_fileinfo_by_path_filename_ids, *select_fileinfo_by_hash_path_filename_ids, *select_fileinfo_by_hash_id, + *select_fileinfo_by_hash_id_key_only, + *select_fileinfo_by_hash_id_resolved, *select_fileinfo_complete_table, *select_fileinfo_complete_table_resolved; @@ -77,10 +82,16 @@ sqlite3_stmt *update_fileinfo_last_seen, sqlite3_stmt *delete_fileinfo_by_id; +sqlite3_stmt *count_fileinfo_by_hash_id, + *count_fileinfo_by_filename, + *count_hashes, + *count_fileinfo; + /*=========== FUNCTIONS ===========*/ void create_tables(); int prepare_statements(); char *select_string_by_int(sqlite3_stmt *st, int64_t id); +int64_t call_count_query(sqlite3_stmt *st); /* Writing this block way too often */ #define DBCONN_CHECK(x) \ @@ -124,16 +135,21 @@ void dbi_close() { LOCAL_FINALIZE(select_filename_by_id); LOCAL_FINALIZE(select_filename_by_name); + LOCAL_FINALIZE(select_filename_complete_table); LOCAL_FINALIZE(select_path_by_id); LOCAL_FINALIZE(select_path_by_pathname); + LOCAL_FINALIZE(select_hashes_all_ids); LOCAL_FINALIZE(select_hashes_by_id); LOCAL_FINALIZE(select_hashes_by_strings); + LOCAL_FINALIZE(select_hashes_complete_table); LOCAL_FINALIZE(select_fileinfo_by_id); LOCAL_FINALIZE(select_fileinfo_by_id_resolved); LOCAL_FINALIZE(select_fileinfo_by_path_id); LOCAL_FINALIZE(select_fileinfo_by_filename_id); LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids); LOCAL_FINALIZE(select_fileinfo_by_hash_id); + LOCAL_FINALIZE(select_fileinfo_by_hash_id_resolved); + LOCAL_FINALIZE(select_fileinfo_by_hash_id_key_only); LOCAL_FINALIZE(select_fileinfo_complete_table); LOCAL_FINALIZE(select_fileinfo_complete_table_resolved); @@ -147,6 +163,10 @@ void dbi_close() { LOCAL_FINALIZE(delete_fileinfo_by_id); + LOCAL_FINALIZE(count_fileinfo); + LOCAL_FINALIZE(count_fileinfo_by_hash_id); + LOCAL_FINALIZE(count_fileinfo_by_filename); + LOCAL_FINALIZE(count_hashes); #undef LOCAL_FINALIZE sqlite3_close(dbconn); @@ -208,6 +228,7 @@ int prepare_statements() { LOCAL_PREP_STMT("SELECT pathname FROM paths WHERE id = ? ;", &select_path_by_id); LOCAL_PREP_STMT("SELECT id FROM paths WHERE pathname = ? ;", &select_path_by_pathname); + LOCAL_PREP_STMT("SELECT hashes.id FROM hashes ;", &select_hashes_all_ids); LOCAL_PREP_STMT("SELECT blake2, sha256, sha512 FROM hashes WHERE id = ? ;", &select_hashes_by_id); LOCAL_PREP_STMT("SELECT id FROM hashes WHERE blake2 = ? AND sha256 = ? AND sha512 = ? ;", &select_hashes_by_strings); @@ -218,14 +239,19 @@ int prepare_statements() { LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? AND fn_id = ? ;", &select_fileinfo_by_path_filename_ids); LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? AND p_id = ? AND fn_id = ? ;", &select_fileinfo_by_hash_path_filename_ids); LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id); + LOCAL_PREP_STMT("SELECT id FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id_key_only); /* TODO: so far the only query with JOINs or masking it in another way? * Many years since */ LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.id = ? ;", &select_fileinfo_by_id_resolved); LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id ;", &select_fileinfo_complete_table_resolved); + LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.h_id = ?;", &select_fileinfo_by_hash_id_resolved); LOCAL_PREP_STMT("SELECT p_id, fn_id, h_id, size, last_seen, stat_struct FROM fileinfo ;", &select_fileinfo_complete_table); + LOCAL_PREP_STMT("SELECT * FROM filenames;", &select_filename_complete_table); + LOCAL_PREP_STMT("SELECT * FROM hashes;", &select_hashes_complete_table); + /* INSERT */ LOCAL_PREP_STMT("INSERT INTO filenames (name) VALUES (?);", &insert_filename); LOCAL_PREP_STMT("INSERT INTO paths (pathname) VALUES (?);", &insert_pathname); @@ -241,6 +267,12 @@ int prepare_statements() { /* DELETE */ LOCAL_PREP_STMT("DELETE FROM fileinfo WHERE id = ? ;", &delete_fileinfo_by_id); + /* COUNT */ + LOCAL_PREP_STMT("SELECT COUNT(fileinfo.id) FROM fileinfo;", &count_fileinfo); + LOCAL_PREP_STMT("SELECT COUNT(fileinfo.h_id) FROM fileinfo WHERE fileinfo.h_id = ?;", &count_fileinfo_by_hash_id); + LOCAL_PREP_STMT("SELECT COUNT(fileinfo.fn_id) FROM fileinfo WHERE fileinfo.fn_id = ?;", &count_fileinfo_by_filename); + LOCAL_PREP_STMT("SELECT COUNT(hashes.id) FROM hashes ;", &count_hashes); + #undef LOCAL_PREP_STMT return 0; } @@ -935,6 +967,204 @@ int dbi_print_fileinfo_resolved(FILE *out) { return rc; } + +inline int64_t call_count_query(sqlite3_stmt *st) { + int64_t count; + int strc = 0; + + DBCONN_CHECK(-1); + + sqlite3_reset(st); + strc = sqlite3_step(st); + + if (strc != SQLITE_ROW && strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + return -1; + } + + count = (int64_t) sqlite3_column_int64(st, 0); + + sqlite3_reset(st); + + return count; +} + +/** + * Get an array containing all ids from table hashes with the first field + * containing the complete length of the array including this field. + * + * @return NULL on failure + * an array on the heap which must be freed by the caller. + */ +int64_t *dbi_select_hashes_all_ids() { + int64_t *result = NULL; + int64_t rows = 0, id = 1, pos = 1; + int strc = 0; + sqlite3_stmt *st = select_hashes_all_ids; + + DBCONN_CHECK(NULL); + + rows = call_count_query(count_hashes); + + if (rows < 0) { return NULL; } + + rows++; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" + if ((result = calloc(rows, sizeof(int64_t))) == NULL) { +#pragma GCC diagnostic pop + LOGERR("ERROR: Failed to allocate heap memory\n"); + return NULL; + } + + result[0] = rows; + + sqlite3_reset(st); + + do { + strc = sqlite3_step(st); + + if (strc == SQLITE_DONE) { + break; + } + + if (strc != SQLITE_ROW) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + free(result); + result = NULL; + break; + } + + id = (int64_t) sqlite3_column_int64(st, 0); /* hashes.id */ + result[pos++] = id; + DBGTRC("DEBUG: Found id %ld\n", id); + + } while (strc == SQLITE_ROW); + + sqlite3_reset(st); + + DBGTRC("DEBUG: rows %ld | pos %ld\n", rows, pos); + + return result; +} + + +/** + * Iterate over the stored hashes, for those associated with more than + * one row in fileinfo the information from the latter will be printed. + * + * @param out filestream for output, if NULL stdout is used + * + * @return 0 on success + * <0 on failure + */ +int dbi_print_identical_hashes(FILE *out) { + int rc = 0; + int64_t hid, count, id, i; + int64_t *hid_array; + int strc_fi = 0, strc_count = 0; + FILE *fd = out; + const unsigned char *txt = NULL; + sqlite3_stmt *stfi = select_fileinfo_by_hash_id_resolved, + *stcount = count_fileinfo_by_hash_id; + + DBCONN_CHECK(-1); + + if (fd == NULL) { fd = stdout; } + + /* SQLite only supports one query at a time per connection, therefore the + * segmented approach. Query for all hashes. Iterating over the array and + * query to count the usage of each id, query in case there's more than a + * single association. A second connection is possible, but would require + * another set of query preparation and other surround stuff. + */ + hid_array = dbi_select_hashes_all_ids(); + if (hid_array == NULL) { + return -1; + } + + for (i=1; i<hid_array[0]; i++) { + sqlite3_clear_bindings(stcount); + sqlite3_reset(stcount); + + /* prevent human errors */ + hid = hid_array[i]; + + if (sqlite3_bind_int64(stcount, 1, hid) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn)); + rc = -1; + break; + } + + strc_count = sqlite3_step(stcount); + if (strc_count == SQLITE_DONE) { /* Not found */ + continue; + } else if (strc_count != SQLITE_ROW) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + rc = -1; + break; + } + + count = (int64_t) sqlite3_column_int64(stcount, 0); + DBGTRC("DEBUG: count results for hash id %ld: %ld\n", hid, count); + + if (count>1) { + sqlite3_reset(stfi); + sqlite3_clear_bindings(stfi); + + if (sqlite3_bind_int64(stfi, 1, hid) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn)); + free(hid_array); + return -1; + } + + do { + strc_fi = sqlite3_step(stfi); + + if (strc_fi == SQLITE_DONE) { + DBGTRC("DEBUG: finished for hid %ld\n", hid); + break; + } + + if (strc_fi != SQLITE_ROW) { + LOGERR("ERROR: Failed step to get fileinfo content: %s\n", sqlite3_errmsg(dbconn)); + free(hid_array); + return -1; /* drop-it */ + } + + txt = sqlite3_column_text(stfi, 0); /* paths.pathname */ + fprintf(fd, "%s/", txt); + txt = sqlite3_column_text(stfi, 1); /* filenames.name */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(stfi, 2); /* hashes.blake2 */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(stfi, 3); /* hashes.sha256 */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(stfi, 4); /* hashes.sha512 */ + fprintf(fd, "%s;", txt); + id = (int64_t) sqlite3_column_int64(stfi, 5); /* fileinfo.size */ + fprintf(fd, "%ld\n", id); + } while (strc_fi == SQLITE_ROW); + } + sqlite3_reset(stfi); + sqlite3_clear_bindings(stfi); + } + + free(hid_array); + + sqlite3_clear_bindings(stfi); + sqlite3_clear_bindings(stcount); + sqlite3_reset(stfi); + sqlite3_reset(stcount); + + return rc; +} + + +int dbi_print_identical_filenames(FILE *out); + + + #if 0 *select_fileinfo_by_id, *select_fileinfo_by_id_resolved, |
