aboutsummaryrefslogtreecommitdiff
path: root/src/database_interaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/database_interaction.c')
-rw-r--r--src/database_interaction.c230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/database_interaction.c b/src/database_interaction.c
index 37ce32c..e9e0b86 100644
--- a/src/database_interaction.c
+++ b/src/database_interaction.c
@@ -53,10 +53,13 @@ sqlite3 *dbconn = NULL;
/* The statements will be wrapped via a function dbi_STATEMENTNAME() for the outside */
sqlite3_stmt *select_filename_by_id,
*select_filename_by_name,
+ *select_filename_complete_table,
*select_path_by_id,
*select_path_by_pathname,
+ *select_hashes_all_ids,
*select_hashes_by_id,
*select_hashes_by_strings,
+ *select_hashes_complete_table,
*select_fileinfo_by_id,
*select_fileinfo_by_id_resolved,
*select_fileinfo_by_path_id,
@@ -64,6 +67,8 @@ sqlite3_stmt *select_filename_by_id,
*select_fileinfo_by_path_filename_ids,
*select_fileinfo_by_hash_path_filename_ids,
*select_fileinfo_by_hash_id,
+ *select_fileinfo_by_hash_id_key_only,
+ *select_fileinfo_by_hash_id_resolved,
*select_fileinfo_complete_table,
*select_fileinfo_complete_table_resolved;
@@ -77,10 +82,16 @@ sqlite3_stmt *update_fileinfo_last_seen,
sqlite3_stmt *delete_fileinfo_by_id;
+sqlite3_stmt *count_fileinfo_by_hash_id,
+ *count_fileinfo_by_filename,
+ *count_hashes,
+ *count_fileinfo;
+
/*=========== FUNCTIONS ===========*/
void create_tables();
int prepare_statements();
char *select_string_by_int(sqlite3_stmt *st, int64_t id);
+int64_t call_count_query(sqlite3_stmt *st);
/* Writing this block way too often */
#define DBCONN_CHECK(x) \
@@ -124,16 +135,21 @@ void dbi_close() {
LOCAL_FINALIZE(select_filename_by_id);
LOCAL_FINALIZE(select_filename_by_name);
+ LOCAL_FINALIZE(select_filename_complete_table);
LOCAL_FINALIZE(select_path_by_id);
LOCAL_FINALIZE(select_path_by_pathname);
+ LOCAL_FINALIZE(select_hashes_all_ids);
LOCAL_FINALIZE(select_hashes_by_id);
LOCAL_FINALIZE(select_hashes_by_strings);
+ LOCAL_FINALIZE(select_hashes_complete_table);
LOCAL_FINALIZE(select_fileinfo_by_id);
LOCAL_FINALIZE(select_fileinfo_by_id_resolved);
LOCAL_FINALIZE(select_fileinfo_by_path_id);
LOCAL_FINALIZE(select_fileinfo_by_filename_id);
LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids);
LOCAL_FINALIZE(select_fileinfo_by_hash_id);
+ LOCAL_FINALIZE(select_fileinfo_by_hash_id_resolved);
+ LOCAL_FINALIZE(select_fileinfo_by_hash_id_key_only);
LOCAL_FINALIZE(select_fileinfo_complete_table);
LOCAL_FINALIZE(select_fileinfo_complete_table_resolved);
@@ -147,6 +163,10 @@ void dbi_close() {
LOCAL_FINALIZE(delete_fileinfo_by_id);
+ LOCAL_FINALIZE(count_fileinfo);
+ LOCAL_FINALIZE(count_fileinfo_by_hash_id);
+ LOCAL_FINALIZE(count_fileinfo_by_filename);
+ LOCAL_FINALIZE(count_hashes);
#undef LOCAL_FINALIZE
sqlite3_close(dbconn);
@@ -208,6 +228,7 @@ int prepare_statements() {
LOCAL_PREP_STMT("SELECT pathname FROM paths WHERE id = ? ;", &select_path_by_id);
LOCAL_PREP_STMT("SELECT id FROM paths WHERE pathname = ? ;", &select_path_by_pathname);
+ LOCAL_PREP_STMT("SELECT hashes.id FROM hashes ;", &select_hashes_all_ids);
LOCAL_PREP_STMT("SELECT blake2, sha256, sha512 FROM hashes WHERE id = ? ;", &select_hashes_by_id);
LOCAL_PREP_STMT("SELECT id FROM hashes WHERE blake2 = ? AND sha256 = ? AND sha512 = ? ;", &select_hashes_by_strings);
@@ -218,14 +239,19 @@ int prepare_statements() {
LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? AND fn_id = ? ;", &select_fileinfo_by_path_filename_ids);
LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? AND p_id = ? AND fn_id = ? ;", &select_fileinfo_by_hash_path_filename_ids);
LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id);
+ LOCAL_PREP_STMT("SELECT id FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id_key_only);
/* TODO: so far the only query with JOINs or masking it in another way?
* Many years since
*/
LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.id = ? ;", &select_fileinfo_by_id_resolved);
LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id ;", &select_fileinfo_complete_table_resolved);
+ LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.h_id = ?;", &select_fileinfo_by_hash_id_resolved);
LOCAL_PREP_STMT("SELECT p_id, fn_id, h_id, size, last_seen, stat_struct FROM fileinfo ;", &select_fileinfo_complete_table);
+ LOCAL_PREP_STMT("SELECT * FROM filenames;", &select_filename_complete_table);
+ LOCAL_PREP_STMT("SELECT * FROM hashes;", &select_hashes_complete_table);
+
/* INSERT */
LOCAL_PREP_STMT("INSERT INTO filenames (name) VALUES (?);", &insert_filename);
LOCAL_PREP_STMT("INSERT INTO paths (pathname) VALUES (?);", &insert_pathname);
@@ -241,6 +267,12 @@ int prepare_statements() {
/* DELETE */
LOCAL_PREP_STMT("DELETE FROM fileinfo WHERE id = ? ;", &delete_fileinfo_by_id);
+ /* COUNT */
+ LOCAL_PREP_STMT("SELECT COUNT(fileinfo.id) FROM fileinfo;", &count_fileinfo);
+ LOCAL_PREP_STMT("SELECT COUNT(fileinfo.h_id) FROM fileinfo WHERE fileinfo.h_id = ?;", &count_fileinfo_by_hash_id);
+ LOCAL_PREP_STMT("SELECT COUNT(fileinfo.fn_id) FROM fileinfo WHERE fileinfo.fn_id = ?;", &count_fileinfo_by_filename);
+ LOCAL_PREP_STMT("SELECT COUNT(hashes.id) FROM hashes ;", &count_hashes);
+
#undef LOCAL_PREP_STMT
return 0;
}
@@ -935,6 +967,204 @@ int dbi_print_fileinfo_resolved(FILE *out) {
return rc;
}
+
+inline int64_t call_count_query(sqlite3_stmt *st) {
+ int64_t count;
+ int strc = 0;
+
+ DBCONN_CHECK(-1);
+
+ sqlite3_reset(st);
+ strc = sqlite3_step(st);
+
+ if (strc != SQLITE_ROW && strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ return -1;
+ }
+
+ count = (int64_t) sqlite3_column_int64(st, 0);
+
+ sqlite3_reset(st);
+
+ return count;
+}
+
+/**
+ * Get an array containing all ids from table hashes with the first field
+ * containing the complete length of the array including this field.
+ *
+ * @return NULL on failure
+ * an array on the heap which must be freed by the caller.
+ */
+int64_t *dbi_select_hashes_all_ids() {
+ int64_t *result = NULL;
+ int64_t rows = 0, id = 1, pos = 1;
+ int strc = 0;
+ sqlite3_stmt *st = select_hashes_all_ids;
+
+ DBCONN_CHECK(NULL);
+
+ rows = call_count_query(count_hashes);
+
+ if (rows < 0) { return NULL; }
+
+ rows++;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-conversion"
+ if ((result = calloc(rows, sizeof(int64_t))) == NULL) {
+#pragma GCC diagnostic pop
+ LOGERR("ERROR: Failed to allocate heap memory\n");
+ return NULL;
+ }
+
+ result[0] = rows;
+
+ sqlite3_reset(st);
+
+ do {
+ strc = sqlite3_step(st);
+
+ if (strc == SQLITE_DONE) {
+ break;
+ }
+
+ if (strc != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ free(result);
+ result = NULL;
+ break;
+ }
+
+ id = (int64_t) sqlite3_column_int64(st, 0); /* hashes.id */
+ result[pos++] = id;
+ DBGTRC("DEBUG: Found id %ld\n", id);
+
+ } while (strc == SQLITE_ROW);
+
+ sqlite3_reset(st);
+
+ DBGTRC("DEBUG: rows %ld | pos %ld\n", rows, pos);
+
+ return result;
+}
+
+
+/**
+ * Iterate over the stored hashes, for those associated with more than
+ * one row in fileinfo the information from the latter will be printed.
+ *
+ * @param out filestream for output, if NULL stdout is used
+ *
+ * @return 0 on success
+ * <0 on failure
+ */
+int dbi_print_identical_hashes(FILE *out) {
+ int rc = 0;
+ int64_t hid, count, id, i;
+ int64_t *hid_array;
+ int strc_fi = 0, strc_count = 0;
+ FILE *fd = out;
+ const unsigned char *txt = NULL;
+ sqlite3_stmt *stfi = select_fileinfo_by_hash_id_resolved,
+ *stcount = count_fileinfo_by_hash_id;
+
+ DBCONN_CHECK(-1);
+
+ if (fd == NULL) { fd = stdout; }
+
+ /* SQLite only supports one query at a time per connection, therefore the
+ * segmented approach. Query for all hashes. Iterating over the array and
+ * query to count the usage of each id, query in case there's more than a
+ * single association. A second connection is possible, but would require
+ * another set of query preparation and other surround stuff.
+ */
+ hid_array = dbi_select_hashes_all_ids();
+ if (hid_array == NULL) {
+ return -1;
+ }
+
+ for (i=1; i<hid_array[0]; i++) {
+ sqlite3_clear_bindings(stcount);
+ sqlite3_reset(stcount);
+
+ /* prevent human errors */
+ hid = hid_array[i];
+
+ if (sqlite3_bind_int64(stcount, 1, hid) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn));
+ rc = -1;
+ break;
+ }
+
+ strc_count = sqlite3_step(stcount);
+ if (strc_count == SQLITE_DONE) { /* Not found */
+ continue;
+ } else if (strc_count != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ rc = -1;
+ break;
+ }
+
+ count = (int64_t) sqlite3_column_int64(stcount, 0);
+ DBGTRC("DEBUG: count results for hash id %ld: %ld\n", hid, count);
+
+ if (count>1) {
+ sqlite3_reset(stfi);
+ sqlite3_clear_bindings(stfi);
+
+ if (sqlite3_bind_int64(stfi, 1, hid) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn));
+ free(hid_array);
+ return -1;
+ }
+
+ do {
+ strc_fi = sqlite3_step(stfi);
+
+ if (strc_fi == SQLITE_DONE) {
+ DBGTRC("DEBUG: finished for hid %ld\n", hid);
+ break;
+ }
+
+ if (strc_fi != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step to get fileinfo content: %s\n", sqlite3_errmsg(dbconn));
+ free(hid_array);
+ return -1; /* drop-it */
+ }
+
+ txt = sqlite3_column_text(stfi, 0); /* paths.pathname */
+ fprintf(fd, "%s/", txt);
+ txt = sqlite3_column_text(stfi, 1); /* filenames.name */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 2); /* hashes.blake2 */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 3); /* hashes.sha256 */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 4); /* hashes.sha512 */
+ fprintf(fd, "%s;", txt);
+ id = (int64_t) sqlite3_column_int64(stfi, 5); /* fileinfo.size */
+ fprintf(fd, "%ld\n", id);
+ } while (strc_fi == SQLITE_ROW);
+ }
+ sqlite3_reset(stfi);
+ sqlite3_clear_bindings(stfi);
+ }
+
+ free(hid_array);
+
+ sqlite3_clear_bindings(stfi);
+ sqlite3_clear_bindings(stcount);
+ sqlite3_reset(stfi);
+ sqlite3_reset(stcount);
+
+ return rc;
+}
+
+
+int dbi_print_identical_filenames(FILE *out);
+
+
+
#if 0
*select_fileinfo_by_id,
*select_fileinfo_by_id_resolved,