aboutsummaryrefslogtreecommitdiff
path: root/src/database_interaction.c
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2026-02-24 23:35:18 +0100
committerThorsten Töpper <atsutane@freethoughts.de>2026-02-24 23:35:18 +0100
commit9230578c5bb4ee0e3e08ce432a7f07b9b967c0b8 (patch)
tree22fcfb8081771c806e579c878f6b352e0468593c /src/database_interaction.c
parenta3b5ddabd37cce398e84f0e98fa9c3318ee8df86 (diff)
downloadduplicate_finder-9230578c5bb4ee0e3e08ce432a7f07b9b967c0b8.tar.gz
duplicate_finder-9230578c5bb4ee0e3e08ce432a7f07b9b967c0b8.tar.bz2
Also print duplicate filenames
Diffstat (limited to 'src/database_interaction.c')
-rw-r--r--src/database_interaction.c132
1 files changed, 125 insertions, 7 deletions
diff --git a/src/database_interaction.c b/src/database_interaction.c
index e9e0b86..3acc81c 100644
--- a/src/database_interaction.c
+++ b/src/database_interaction.c
@@ -53,6 +53,7 @@ sqlite3 *dbconn = NULL;
/* The statements will be wrapped via a function dbi_STATEMENTNAME() for the outside */
sqlite3_stmt *select_filename_by_id,
*select_filename_by_name,
+ *select_filename_all_ids,
*select_filename_complete_table,
*select_path_by_id,
*select_path_by_pathname,
@@ -64,6 +65,7 @@ sqlite3_stmt *select_filename_by_id,
*select_fileinfo_by_id_resolved,
*select_fileinfo_by_path_id,
*select_fileinfo_by_filename_id,
+ *select_fileinfo_by_filename_id_resolved,
*select_fileinfo_by_path_filename_ids,
*select_fileinfo_by_hash_path_filename_ids,
*select_fileinfo_by_hash_id,
@@ -84,6 +86,7 @@ sqlite3_stmt *delete_fileinfo_by_id;
sqlite3_stmt *count_fileinfo_by_hash_id,
*count_fileinfo_by_filename,
+ *count_filenames,
*count_hashes,
*count_fileinfo;
@@ -92,6 +95,7 @@ void create_tables();
int prepare_statements();
char *select_string_by_int(sqlite3_stmt *st, int64_t id);
int64_t call_count_query(sqlite3_stmt *st);
+int64_t *call_select_all_ids(sqlite3_stmt *all_ids, sqlite3_stmt *count_query);
/* Writing this block way too often */
#define DBCONN_CHECK(x) \
@@ -133,6 +137,7 @@ void dbi_close() {
DBCONN_CHECK();
+ LOCAL_FINALIZE(select_filename_all_ids);
LOCAL_FINALIZE(select_filename_by_id);
LOCAL_FINALIZE(select_filename_by_name);
LOCAL_FINALIZE(select_filename_complete_table);
@@ -146,6 +151,7 @@ void dbi_close() {
LOCAL_FINALIZE(select_fileinfo_by_id_resolved);
LOCAL_FINALIZE(select_fileinfo_by_path_id);
LOCAL_FINALIZE(select_fileinfo_by_filename_id);
+ LOCAL_FINALIZE(select_fileinfo_by_filename_id_resolved);
LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids);
LOCAL_FINALIZE(select_fileinfo_by_hash_id);
LOCAL_FINALIZE(select_fileinfo_by_hash_id_resolved);
@@ -166,6 +172,7 @@ void dbi_close() {
LOCAL_FINALIZE(count_fileinfo);
LOCAL_FINALIZE(count_fileinfo_by_hash_id);
LOCAL_FINALIZE(count_fileinfo_by_filename);
+ LOCAL_FINALIZE(count_filenames);
LOCAL_FINALIZE(count_hashes);
#undef LOCAL_FINALIZE
@@ -224,6 +231,8 @@ int prepare_statements() {
/* SELECT */
LOCAL_PREP_STMT("SELECT name FROM filenames WHERE id = ? ;", &select_filename_by_id);
LOCAL_PREP_STMT("SELECT id FROM filenames WHERE name = ? ;", &select_filename_by_name);
+ LOCAL_PREP_STMT("SELECT id FROM filenames ;", &select_filename_all_ids);
+ LOCAL_PREP_STMT("SELECT * FROM filenames;", &select_filename_complete_table);
LOCAL_PREP_STMT("SELECT pathname FROM paths WHERE id = ? ;", &select_path_by_id);
LOCAL_PREP_STMT("SELECT id FROM paths WHERE pathname = ? ;", &select_path_by_pathname);
@@ -231,6 +240,7 @@ int prepare_statements() {
LOCAL_PREP_STMT("SELECT hashes.id FROM hashes ;", &select_hashes_all_ids);
LOCAL_PREP_STMT("SELECT blake2, sha256, sha512 FROM hashes WHERE id = ? ;", &select_hashes_by_id);
LOCAL_PREP_STMT("SELECT id FROM hashes WHERE blake2 = ? AND sha256 = ? AND sha512 = ? ;", &select_hashes_by_strings);
+ LOCAL_PREP_STMT("SELECT * FROM hashes;", &select_hashes_complete_table);
LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE id = ? ;", &select_fileinfo_by_id);
@@ -247,10 +257,9 @@ int prepare_statements() {
LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.id = ? ;", &select_fileinfo_by_id_resolved);
LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id ;", &select_fileinfo_complete_table_resolved);
LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.h_id = ?;", &select_fileinfo_by_hash_id_resolved);
+ LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.fn_id = ?;", &select_fileinfo_by_filename_id_resolved);
LOCAL_PREP_STMT("SELECT p_id, fn_id, h_id, size, last_seen, stat_struct FROM fileinfo ;", &select_fileinfo_complete_table);
- LOCAL_PREP_STMT("SELECT * FROM filenames;", &select_filename_complete_table);
- LOCAL_PREP_STMT("SELECT * FROM hashes;", &select_hashes_complete_table);
/* INSERT */
LOCAL_PREP_STMT("INSERT INTO filenames (name) VALUES (?);", &insert_filename);
@@ -272,6 +281,7 @@ int prepare_statements() {
LOCAL_PREP_STMT("SELECT COUNT(fileinfo.h_id) FROM fileinfo WHERE fileinfo.h_id = ?;", &count_fileinfo_by_hash_id);
LOCAL_PREP_STMT("SELECT COUNT(fileinfo.fn_id) FROM fileinfo WHERE fileinfo.fn_id = ?;", &count_fileinfo_by_filename);
LOCAL_PREP_STMT("SELECT COUNT(hashes.id) FROM hashes ;", &count_hashes);
+ LOCAL_PREP_STMT("SELECT COUNT(filenames.id) FROM filenames ;", &count_filenames);
#undef LOCAL_PREP_STMT
return 0;
@@ -990,21 +1000,21 @@ inline int64_t call_count_query(sqlite3_stmt *st) {
}
/**
- * Get an array containing all ids from table hashes with the first field
+ * Get an array containing all ids from table with the first field
* containing the complete length of the array including this field.
*
* @return NULL on failure
* an array on the heap which must be freed by the caller.
*/
-int64_t *dbi_select_hashes_all_ids() {
+inline int64_t *call_select_all_ids(sqlite3_stmt *all_ids, sqlite3_stmt *count_query) {
int64_t *result = NULL;
int64_t rows = 0, id = 1, pos = 1;
int strc = 0;
- sqlite3_stmt *st = select_hashes_all_ids;
+ sqlite3_stmt *st = all_ids;
DBCONN_CHECK(NULL);
- rows = call_count_query(count_hashes);
+ rows = call_count_query(count_query);
if (rows < 0) { return NULL; }
@@ -1048,6 +1058,14 @@ int64_t *dbi_select_hashes_all_ids() {
return result;
}
+int64_t *dbi_select_filenames_all_ids() {
+ return call_select_all_ids(select_filename_all_ids, count_filenames);
+}
+
+
+int64_t *dbi_select_hashes_all_ids() {
+ return call_select_all_ids(select_hashes_all_ids, count_hashes);
+}
/**
* Iterate over the stored hashes, for those associated with more than
@@ -1161,7 +1179,107 @@ int dbi_print_identical_hashes(FILE *out) {
}
-int dbi_print_identical_filenames(FILE *out);
+int dbi_print_identical_filenames(FILE *out) {
+ int rc = 0;
+ int64_t fnid, count, id, i;
+ int64_t *fnid_array;
+ int strc_fi = 0, strc_count = 0;
+ FILE *fd = out;
+ const unsigned char *txt = NULL;
+ sqlite3_stmt *stfi = select_fileinfo_by_filename_id_resolved,
+ *stcount = count_fileinfo_by_filename;
+
+ DBCONN_CHECK(-1);
+
+ if (fd == NULL) { fd = stdout; }
+
+ /* SQLite only supports one query at a time per connection, therefore the
+ * segmented approach. Query for all hashes. Iterating over the array and
+ * query to count the usage of each id, query in case there's more than a
+ * single association. A second connection is possible, but would require
+ * another set of query preparation and other surround stuff.
+ */
+ fnid_array = dbi_select_filenames_all_ids();
+ if (fnid_array == NULL) {
+ return -1;
+ }
+
+ for (i=1; i<fnid_array[0]; i++) {
+ sqlite3_clear_bindings(stcount);
+ sqlite3_reset(stcount);
+
+ /* prevent human errors */
+ fnid = fnid_array[i];
+
+ if (sqlite3_bind_int64(stcount, 1, fnid) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn));
+ rc = -1;
+ break;
+ }
+
+ strc_count = sqlite3_step(stcount);
+ if (strc_count == SQLITE_DONE) { /* Not found */
+ continue;
+ } else if (strc_count != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ rc = -1;
+ break;
+ }
+
+ count = (int64_t) sqlite3_column_int64(stcount, 0);
+ DBGTRC("DEBUG: count results for hash id %ld: %ld\n", fnid, count);
+
+ if (count>1) {
+ sqlite3_reset(stfi);
+ sqlite3_clear_bindings(stfi);
+
+ if (sqlite3_bind_int64(stfi, 1, fnid) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind hashes.id to prepared statement for count: %s\n", sqlite3_errmsg(dbconn));
+ free(fnid_array);
+ return -1;
+ }
+
+ do {
+ strc_fi = sqlite3_step(stfi);
+
+ if (strc_fi == SQLITE_DONE) {
+ DBGTRC("DEBUG: finished for hid %ld\n", fnid);
+ break;
+ }
+
+ if (strc_fi != SQLITE_ROW) {
+ LOGERR("ERROR: Failed step to get fileinfo content: %s\n", sqlite3_errmsg(dbconn));
+ free(fnid_array);
+ return -1; /* drop-it */
+ }
+
+ txt = sqlite3_column_text(stfi, 0); /* paths.pathname */
+ fprintf(fd, "%s/", txt);
+ txt = sqlite3_column_text(stfi, 1); /* filenames.name */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 2); /* hashes.blake2 */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 3); /* hashes.sha256 */
+ fprintf(fd, "%s;", txt);
+ txt = sqlite3_column_text(stfi, 4); /* hashes.sha512 */
+ fprintf(fd, "%s;", txt);
+ id = (int64_t) sqlite3_column_int64(stfi, 5); /* fileinfo.size */
+ fprintf(fd, "%ld\n", id);
+ } while (strc_fi == SQLITE_ROW);
+ }
+ sqlite3_reset(stfi);
+ sqlite3_clear_bindings(stfi);
+ }
+
+ free(fnid_array);
+
+ sqlite3_clear_bindings(stfi);
+ sqlite3_clear_bindings(stcount);
+ sqlite3_reset(stfi);
+ sqlite3_reset(stcount);
+
+ return rc;
+}