diff options
| author | Thorsten Töpper <atsutane@freethoughts.de> | 2026-02-18 21:51:53 +0100 |
|---|---|---|
| committer | Thorsten Töpper <atsutane@freethoughts.de> | 2026-02-18 21:51:53 +0100 |
| commit | d513977a3566b14d9357906615d045d71741537f (patch) | |
| tree | 3e707d2de9da71d98650fa8bb1b92ed11ab724ba /src | |
| parent | eed2d1323441861f2d41f0ecc0a72fcc9190fa5f (diff) | |
| download | duplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.gz duplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.bz2 | |
squash initial implementation
Diffstat (limited to 'src')
| -rw-r--r-- | src/database_interaction.c | 885 | ||||
| -rw-r--r-- | src/directory_scanner.c | 211 | ||||
| -rw-r--r-- | src/duplicate_finder.c | 49 | ||||
| -rw-r--r-- | src/file_processor.c | 110 | ||||
| -rw-r--r-- | src/kv_manager.c | 347 | ||||
| -rw-r--r-- | src/options.c | 118 |
6 files changed, 1715 insertions, 5 deletions
diff --git a/src/database_interaction.c b/src/database_interaction.c new file mode 100644 index 0000000..e31d197 --- /dev/null +++ b/src/database_interaction.c @@ -0,0 +1,885 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/** + * Copyright 2026 Thorsten Töpper + * + * The database contains those tables: + * - filenames + * -> id INTEGER PRIMARY KEY + * -> name TEXT + * - paths + * -> id INTEGER PRIMARY KEY + * -> pathname TEXT + * - fileinfo + * -> id INTEGER PRIMARY KEY + * -> p_id INTEGER + * -> fn_id INTEGER + * -> h_id INTEGER + * -> size INTEGER + * -> last_seen INTEGER + * -> stat_bin BLOB + * - hashes + * -> id INTEGER PRIMARY KEY + * -> blake2 TEXT + * -> sha256 TEXT + * -> sha512 TEXT + * + * @file database_interaction.c + * + * vim:ts=4:sw=4:expandtab + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> +#include <time.h> + +#include <sqlite3.h> + +#include "options.h" +#include "database_interaction.h" +#include "trace_macros.h" +#include "file_processor.h" + + +/*=========== DEFINES, CONSTANTS AND TYPES ===========*/ + +/*=========== GLOBAL VARIABLES ===========*/ +sqlite3 *dbconn = NULL; + +/* The statements will be wrapped via a function dbi_STATEMENTNAME() for the outside */ +sqlite3_stmt *select_filename_by_id, + *select_filename_by_name, + *select_path_by_id, + *select_path_by_pathname, + *select_hashes_by_id, + *select_hashes_by_strings, + *select_fileinfo_by_id, + *select_fileinfo_by_id_resolved, + *select_fileinfo_by_path_id, + *select_fileinfo_by_filename_id, + *select_fileinfo_by_path_filename_ids, + *select_fileinfo_by_hash_path_filename_ids, + *select_fileinfo_by_hash_id; + +sqlite3_stmt *insert_filename, + *insert_pathname, + *insert_hashes, + *insert_fileinfo; + +sqlite3_stmt *update_fileinfo_last_seen, + *update_fileinfo_complete; + +sqlite3_stmt *delete_fileinfo_by_id; + +/*=========== FUNCTIONS ===========*/ +void create_tables(); +int prepare_statements(); +char *select_string_by_int(sqlite3_stmt *st, int64_t id); + +/* Writing this block way too often */ +#define DBCONN_CHECK(x) \ + if (dbconn==NULL){ LOGERR("ERROR: No database connection.\n");\ + return x; } + + + +bool dbi_open(char *filename) { + if (filename == NULL || filename[0] == '\0') { + LOGERR("ERROR: No valid filename given.\n"); + return false; + } + + if (dbconn != NULL) { + LOGERR("ERROR: There's already an open database\n"); + return false; + } + + if (sqlite3_open(filename, &dbconn) != SQLITE_OK) { + LOGERR("ERROR: Failed to open database: %s\n", + sqlite3_errmsg(dbconn)); + sqlite3_close(dbconn); + dbconn = NULL; + return false; + } + + create_tables(); + if (prepare_statements() != 0) { + return false; + } + + return true; +} + +void dbi_close() { + /* TODO: sqlite3_finalize for all prepared statements */ +#define LOCAL_FINALIZE(x) { sqlite3_finalize(x); x=NULL; } + + DBCONN_CHECK(); + + LOCAL_FINALIZE(select_filename_by_id); + LOCAL_FINALIZE(select_filename_by_name); + LOCAL_FINALIZE(select_path_by_id); + LOCAL_FINALIZE(select_path_by_pathname); + LOCAL_FINALIZE(select_hashes_by_id); + LOCAL_FINALIZE(select_hashes_by_strings); + LOCAL_FINALIZE(select_fileinfo_by_id); + LOCAL_FINALIZE(select_fileinfo_by_id_resolved); + LOCAL_FINALIZE(select_fileinfo_by_path_id); + LOCAL_FINALIZE(select_fileinfo_by_filename_id); + LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids); + LOCAL_FINALIZE(select_fileinfo_by_hash_id); + + LOCAL_FINALIZE(insert_filename); + LOCAL_FINALIZE(insert_pathname); + LOCAL_FINALIZE(insert_hashes); + LOCAL_FINALIZE(insert_fileinfo); + + LOCAL_FINALIZE(update_fileinfo_last_seen); + LOCAL_FINALIZE(update_fileinfo_complete); + + LOCAL_FINALIZE(delete_fileinfo_by_id); + +#undef LOCAL_FINALIZE + + sqlite3_close(dbconn); + dbconn = NULL; +} + + +/** + * Create the later used tables if they don't exist yet + */ +inline void create_tables() { + char *err = NULL; + + sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS filenames ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE);", NULL, NULL, &err); + if (err != NULL) { + LOGERR("ERROR: Creation of table filenames failed: %s\n", err); + sqlite3_free(err); + err = NULL; + } + + sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS paths ( id INTEGER PRIMARY KEY AUTOINCREMENT, pathname TEXT UNIQUE);", NULL, NULL, &err); + if (err != NULL) { + LOGERR("ERROR: Creation of table pathss failed: %s\n", err); + sqlite3_free(err); + err = NULL; + } + + /* no UNIQUE here, as even for the rare case of a hash collission in a single algorithm, all three won't collide at the same time. */ + sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS hashes ( id INTEGER PRIMARY KEY AUTOINCREMENT, blake2 TEXT, sha256 TEXT, sha512 TEXT );", NULL, NULL, &err); + if (err != NULL) { + LOGERR("ERROR: Creation of table hashes failed: %s\n", err); + sqlite3_free(err); + err = NULL; + } + sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS fileinfo ( id INTEGER PRIMARY KEY, p_id INTEGER, " + "fn_id INTEGER, h_id INTEGER, size INTEGER, last_seen INTEGER, stat_struct BLOB, " + "FOREIGN KEY(p_id) REFERENCES paths(id), FOREIGN KEY(fn_id) REFERENCES filenames(id), " + "FOREIGN KEY(h_id) REFERENCES hashes(id));", NULL, NULL, &err); + if (err != NULL) { + LOGERR("ERROR: Creation of table fileinfo failed: %s\n", err); + sqlite3_free(err); + err = NULL; + } +} + +int prepare_statements() { + int counter = 0; + /* Error handling in KISS. */ +#define LOCAL_PREP_STMT(q, s) { counter++; \ + if ((sqlite3_prepare_v2(dbconn, q, -1, s, NULL)) != SQLITE_OK) { \ + LOGERR("ERROR: Failed to prepare statement %d '%s': %s\n", \ + counter, q, sqlite3_errmsg(dbconn)); return -1; } \ + } + + /* SELECT */ + LOCAL_PREP_STMT("SELECT name FROM filenames WHERE id = ? ;", &select_filename_by_id); + LOCAL_PREP_STMT("SELECT id FROM filenames WHERE name = ? ;", &select_filename_by_name); + + LOCAL_PREP_STMT("SELECT pathname FROM paths WHERE id = ? ;", &select_path_by_id); + LOCAL_PREP_STMT("SELECT id FROM paths WHERE pathname = ? ;", &select_path_by_pathname); + + LOCAL_PREP_STMT("SELECT blake2, sha256, sha512 FROM hashes WHERE id = ? ;", &select_hashes_by_id); + LOCAL_PREP_STMT("SELECT id FROM hashes WHERE blake2 = ? AND sha256 = ? AND sha512 = ? ;", &select_hashes_by_strings); + + + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE id = ? ;", &select_fileinfo_by_id); + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? ;", &select_fileinfo_by_path_id); + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE fn_id = ? ;", &select_fileinfo_by_filename_id); + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? AND fn_id = ? ;", &select_fileinfo_by_path_filename_ids); + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? AND p_id = ? AND fn_id = ? ;", &select_fileinfo_by_hash_path_filename_ids); + LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id); + + /* TODO: so far the only query with JOINs or masking it in another way? Ignore this frontend query for now.*/ + select_fileinfo_by_id_resolved = NULL; + + /* INSERT */ + LOCAL_PREP_STMT("INSERT INTO filenames (name) VALUES (?);", &insert_filename); + LOCAL_PREP_STMT("INSERT INTO paths (pathname) VALUES (?);", &insert_pathname); + LOCAL_PREP_STMT("INSERT INTO hashes (blake2, sha256, sha512) VALUES (@blake2, @sha256, @sha512);", &insert_hashes); + LOCAL_PREP_STMT("INSERT INTO fileinfo (p_id, fn_id, h_id, size, last_seen, stat_struct) " + "VALUES (?, ?, ?, ?, ?, ?);", &insert_fileinfo); + + /* UPDATE */ + LOCAL_PREP_STMT("UPDATE fileinfo SET last_seen = @time WHERE id = @id ;", &update_fileinfo_last_seen); + LOCAL_PREP_STMT("UPDATE fileinfo SET p_id = @pid , fn_id = @fnid , h_id = @hid , " + "size = @sz , last_seen = @ls, stat_struct = @stat WHERE id = @id ;", &update_fileinfo_complete); + + /* DELETE */ + LOCAL_PREP_STMT("DELETE FROM fileinfo WHERE id = ? ;", &delete_fileinfo_by_id); + +#undef LOCAL_PREP_STMT + return 0; +} + +/** + * To be wrapped for simple SELECT text ... WHERE PK = id; statements. + * @param st A prepared statement + * @param id A 64 bit integer used as primary key in the query. + * @return NULL in case of error + * copy of the database content + */ +char *select_string_by_int(sqlite3_stmt *st, int64_t id) { + char *result = NULL; + int strc = 0; + const char *text; + + DBCONN_CHECK(NULL); + + if (st == NULL) { + LOGERR("ERROR: No prepared statement.\n"); + return NULL; + } + + if (id < 1) { + //LOGERR("ERROR: Invalid id %" PRId64 "\n", id); + LOGERR("ERROR: Invalid id %ld\n", id); + return NULL; + } + + if (sqlite3_bind_int64(st, 1, id) != SQLITE_OK) { + // LOGERR("ERROR: Failed to bind id %" PRId64 " to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + LOGERR("ERROR: Failed to bind id %ld to prepared statement: %s\n", id, sqlite3_errmsg(dbconn)); + return NULL; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + text = (const char *)sqlite3_column_text(st, 0); + if ((result = calloc((strlen(text)+1), sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for copy of query result.\n"); + sqlite3_reset(st); + return NULL; + } + memcpy(result, text, strlen(text)); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return result; +} + +/** + * + * @return 0 if ok, <0 in case of error + */ +int insert_text(sqlite3_stmt *st, int64_t (*check_function)(const char*), const char *text) { + int strc = 0; + + DBCONN_CHECK(-1); + + if (st == NULL) { + LOGERR("ERROR: No prepared statement.\n"); + return -1; + } + + if (text == NULL) { + LOGERR("ERROR: No content to insert.\n"); + return -1; + } + + /* CHECK WHETHER THE ENTRY ALREADY EXISTS! */ + if ((check_function != NULL) && (*check_function)(text) > 0) { + return 0; + } + + if (sqlite3_bind_text(st, 1, text, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind text '%s' to statement: %s\n", text, sqlite3_errmsg(dbconn)); + return -1; + } + + strc = sqlite3_step(st); + if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed to insert text '%s' into db: %s\n", text, sqlite3_errmsg(dbconn)); + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return (strc == SQLITE_DONE) ? 0 : -1; +} + +/** + * To be wrapped for simple SELECT text ... WHERE COL = string; statements COL being UNIQUE. + * @param st A prepared statement + * @param id a string bound to the WHERE in the statement + * @return < -1 in case of error + * 0 if not found + * >0 the id in the database + */ +int64_t select_int_by_string(sqlite3_stmt *st, const char *s) { + int64_t result = -1; + int strc = 0; + + DBCONN_CHECK(-2); + + if (st == NULL) { + LOGERR("ERROR: No prepared statement.\n"); + return -2; + } + + if (s == NULL || strlen(s)==0) { + LOGERR("ERROR: Invalid string %s\n", s); + return -2; + } + + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_text(st, 1, s, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind string %s to prepared statement: %s\n", s, sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + result = (int64_t) sqlite3_column_int64(st, 0); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + result = -2; + } else { /* SQLITE_DONE => EMPTY */ + DBGTRC("DEBUG: Combination not found in db\n"); + result = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return result; +} + + + +char *dbi_select_filename_by_id(int64_t id) { + return select_string_by_int(select_filename_by_id, id); +} + +char *dbi_select_path_by_id(int64_t id) { + return select_string_by_int(select_path_by_id, id); +} + +int64_t dbi_select_filename_by_name(const char *name) { + return select_int_by_string(select_filename_by_name, name); +} + +int64_t dbi_select_path_by_pathname(const char *pathname) { + return select_int_by_string(select_path_by_pathname, pathname); +} + + +int dbi_insert_filename(const char *filename) { + return insert_text(insert_filename, dbi_select_filename_by_name, filename); +} + +int dbi_insert_pathname(const char *path) { + return insert_text(insert_pathname, dbi_select_path_by_pathname, path); +} + +int64_t dbi_select_hashes_by_strings(const char *blake2, const char *sha256, const char *sha512) { + int64_t result = 0; + int strc = 0; + sqlite3_stmt *st = select_hashes_by_strings; + + DBCONN_CHECK(-2); + + if (blake2 == NULL || sha256 == NULL || sha512 == NULL || + strlen(blake2)==0 || strlen(sha256)==0 || strlen(sha512)==0) { + LOGERR("ERROR: Invalid argument: blake2=%s sha256=%s sha512=%s\n", + blake2, sha256, sha512); + return -2; + } + + /* Avoid conflict with earlier calls */ + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "blake2"), blake2, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field blake2 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha256"), sha256, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field sha256 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha512"), sha512, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field sha512 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + result = (int64_t) sqlite3_column_int64(st, 0); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + result = -2; + } else { /* SQLITE_DONE => EMPTY */ + DBGTRC("DEBUG: Combination not found in db\n"); + result = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return result; +} + +int dbi_insert_hashes(const char *blake2, const char *sha256, const char *sha512) { + int rc = 0; + int64_t strc = 0; + sqlite3_stmt *st = insert_hashes; + + DBCONN_CHECK(-2); + + if (blake2 == NULL || sha256 == NULL || sha512 == NULL || + strlen(blake2)==0 || strlen(sha256)==0 || strlen(sha512)==0) { + LOGERR("ERROR: Invalid argument: blake2=%s sha256=%s sha512=%s\n", + blake2, sha256, sha512); + return -2; + } + + if (dbi_select_hashes_by_strings(blake2, sha256, sha512) > 0) { + return 0; + } + + /* Avoid conflict with earlier calls */ + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "blake2"), blake2, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field blake2 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha256"), sha256, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field sha256 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha512"), sha512, -1, SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind field sha512 to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed to insert hashes (blake2=%s, sha256=%s, sha512=%s) into db: %s\n", + blake2, sha256, sha512, sqlite3_errmsg(dbconn)); + rc = -2; + } else { + rc = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return rc; +} + +struct df_hashstrings *dbi_select_hashes_by_id(int64_t id) { + struct df_hashstrings *result = NULL; + int strc = 0; + sqlite3_stmt *st = select_hashes_by_id; + const char *text; + + DBCONN_CHECK(NULL); + + if (id < 1) { + LOGERR("ERROR: invalid id %lld\n", (long long int)id); /* TODO: Macro resolve not ok */ + return NULL; + } + + if (sqlite3_bind_int64(st, 1, id) != SQLITE_OK) { + // LOGERR("ERROR: Failed to bind id %" PRId64 " to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + LOGERR("ERROR: Failed to bind id %ld to prepared statement: %s\n", id, sqlite3_errmsg(dbconn)); + return NULL; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + if ((result = calloc(1, sizeof(struct df_hashstrings))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for copy of query result.\n"); + sqlite3_reset(st); + return NULL; + } + text = (const char *)sqlite3_column_text(st, 0); + memcpy(result->blake2, text, strlen(text)); + text = (const char *)sqlite3_column_text(st, 1); + memcpy(result->sha256, text, strlen(text)); + text = (const char *)sqlite3_column_text(st, 2); + memcpy(result->sha512, text, strlen(text)); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + return result; +} + + +int64_t dbi_select_fileinfo_by_hash_path_filename_ids(int64_t hash_id, int64_t path_id, int64_t filename_id) { + int64_t result = 0; + int strc = 0; + sqlite3_stmt *st = select_fileinfo_by_hash_path_filename_ids; + + DBCONN_CHECK(-2); + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if ( hash_id < 1 || path_id < 1 || filename_id < 1 ) { + LOGERR("ERROR: At least one invalid id: hashes %ld | path %ld | filename %ld\n", + hash_id, path_id, filename_id); + return -2; + } + + if (sqlite3_bind_int64(st, 1, hash_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind hash_id %ld to prepared statement: %s\n", hash_id, sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_int64(st, 2, path_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind path_id %ld to prepared statement: %s\n", path_id, sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_int64(st, 3, filename_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind filename_id %ld to prepared statement: %s\n", filename_id, sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + result = (int64_t) sqlite3_column_int64(st, 0); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + result = -2; + } else { /* SQLITE_DONE => EMPTY */ + DBGTRC("DEBUG: Combination not found in db\n"); + result = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + + return result; +} + +int64_t dbi_select_fileinfo_by_path_filename_ids(int64_t pname_id, int64_t fname_id) { + int64_t result = 0; + int strc = 0; + sqlite3_stmt *st = select_fileinfo_by_path_filename_ids; + + DBCONN_CHECK(-2); + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if ( pname_id < 1 || fname_id < 1 ) { + LOGERR("ERROR: At least one invalid id: path %ld | filename %ld\n", + pname_id, fname_id); + return -2; + } + + if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind pname_id %ld to prepared statement: %s\n", pname_id, sqlite3_errmsg(dbconn)); + return -2; + } + + if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind fname_id %ld to prepared statement: %s\n", fname_id, sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + /* Dont' forget: the sqlite3_reset() call must be executed! */ + if (strc == SQLITE_ROW) { + result = (int64_t) sqlite3_column_int64(st, 0); + } else if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + result = -2; + } else { /* SQLITE_DONE => EMPTY */ + DBGTRC("DEBUG: Combination not found in db\n"); + result = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return result; +} + +int dbi_update_fileinfo_last_seen(int64_t id) { + int rc = -1, strc = 0; + time_t ts = time(NULL); + sqlite3_stmt *st = update_fileinfo_last_seen; + + DBCONN_CHECK( -2 ); + + if (id < 1) { + LOGERR("ERROR: Invalid id.\n"); + return -1; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_int64(st, 1, ts) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 2, id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed to update last_seen timestamp for entry %ld: %s\n", id, sqlite3_errmsg(dbconn)); + rc = -2; + } else { + rc = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return rc; +} + +int update_fileinfo_function(struct df_fileinfo *fi, int64_t existing_id, + int64_t pname_id, int64_t fname_id, int64_t hashes_id) { + int rc = -1, strc = 0; + time_t ts = time(NULL); + sqlite3_stmt *st = update_fileinfo_complete; + + DBCONN_CHECK( -2 ); + + if (fi == NULL) { + LOGERR("ERROR: Invalid argument.\n"); + return -2; + } + + if (existing_id < 1) { + /* TODO: ALL possible situations need to be checked */ + if (fname_id < 1) { + fname_id = dbi_select_filename_by_name(fi->name); + } + + if (pname_id < 1) { + pname_id = dbi_select_path_by_pathname(fi->path); + } + } else { + LOGERR("ERROR: No entry given.\n"); + return -2; + } + + /* Possibly new hashes so always INSERT and use the return which was given */ + if (hashes_id < 1) { + if (dbi_insert_hashes(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512) < 0) { + LOGERR("ERROR: abort due to previous error.\n"); + return -2; + } + hashes_id = dbi_select_hashes_by_strings(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512); + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind path_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 3, hashes_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 4, fi->statbuf.st_size) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind size to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 5, ts) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_blob(st, 6, &(fi->statbuf), sizeof(struct stat), SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + strc = sqlite3_step(st); + if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed to completely update entry %ld: %s\n", existing_id, sqlite3_errmsg(dbconn)); + rc = -2; + } else { + rc = 0; + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return rc; +} + +int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id) { + return update_fileinfo_function(fi, existing_id, -1, -1, -1); +} + +/** + * Wrapper function around several other inserts + */ +int dbi_insert_fileinfo(struct df_fileinfo *fi) { + int rc = 0, strc = 0; + int64_t fname_id, pname_id, hashes_id, existing_entry = 0; + time_t ts = 0; + sqlite3_stmt *st = insert_fileinfo; + + + DBCONN_CHECK(-2); + + if (fi == NULL) { + LOGERR("ERROR: No fileinfo given.\n"); + return -2; + } + + if (dbi_insert_filename(fi->name) < 0) { + LOGERR("ERROR: abort due to previous error.\n"); + return -2; + } + fname_id = dbi_select_filename_by_name(fi->name); + + if (dbi_insert_pathname(fi->path) < 0) { + LOGERR("ERROR: abort due to previous error.\n"); + return -2; + } + pname_id = dbi_select_path_by_pathname(fi->path); + + + /* TODO: Take some time and decide whether it shall stay like this or hand over the struct */ + if (dbi_insert_hashes(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512) < 0) { + LOGERR("ERROR: abort due to previous error.\n"); + return -2; + } + hashes_id = dbi_select_hashes_by_strings(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512); + + /* Any problems with the selects? */ + if (fname_id <0 || pname_id <0 || hashes_id <0) { + LOGERR("ERROR: abort due to previous error.\n"); + return -2; + } + + ts = time(NULL); + + /* TODO: There also belongs a query whether the fullpath already has an entry, if so and hash_id + * is different, an UPDATE and not an insert is required. + */ + existing_entry = dbi_select_fileinfo_by_hash_path_filename_ids(hashes_id, pname_id, fname_id); + if (existing_entry > 0) { + return dbi_update_fileinfo_last_seen(existing_entry); + } + + /* fullpath entry exists, but the hashes mismatch. */ + existing_entry = dbi_select_fileinfo_by_path_filename_ids(pname_id, fname_id); + if (existing_entry > 0) { + return update_fileinfo_function(fi, existing_entry, pname_id, fname_id, hashes_id); + } + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind path_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 3, hashes_id) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 4, fi->statbuf.st_size) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind size to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_int64(st, 5, ts) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + if (sqlite3_bind_blob(st, 6, &(fi->statbuf), sizeof(struct stat), SQLITE_TRANSIENT) != SQLITE_OK) { + LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn)); + return -2; + } + + + strc = sqlite3_step(st); + if (strc != SQLITE_DONE) { + LOGERR("ERROR: Failed to insert fileinfo for %s/%s into db: %s\n", fi->path, fi->name, sqlite3_errmsg(dbconn)); + rc = -2; + } else { + rc = 0; + } + + + sqlite3_clear_bindings(st); + sqlite3_reset(st); + + return rc; +}; + +#if 0 + *select_fileinfo_by_id, + *select_fileinfo_by_id_resolved, + *select_fileinfo_by_path_id, + *select_fileinfo_by_filename_id, + *select_fileinfo_by_path_filename_ids, + *select_fileinfo_by_hash_id; + + +sqlite3_stmt *, + +sqlite3_stmt *delete_fileinfo_by_id; +#endif + diff --git a/src/directory_scanner.c b/src/directory_scanner.c new file mode 100644 index 0000000..27627d5 --- /dev/null +++ b/src/directory_scanner.c @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/** + * Copyright 2026 Thorsten Töpper + * + * @file directory_scanner.c + * + * vim:ts=4:sw=4:expandtab + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> +#include <sys/stat.h> +#include <dirent.h> + +#include "options.h" +#include "directory_scanner.h" +#include "kv_manager.h" +#include "trace_macros.h" + + +/*=========== DEFINES, CONSTANTS AND TYPES ===========*/ + +struct path_stack { + struct path_stack *next; + char *fullpath; +}; + +/*=========== GLOBAL VARIABLES ===========*/ +struct path_stack *pstack = NULL; + + +/*=========== FUNCTIONS ===========*/ + +int pstack_push(char *s); +char *pstack_pop(); + + +inline int pstack_push(char *s) { + struct path_stack *node; + if (s == NULL || s[0] == '\0') { + LOGERR("ERROR: Empty path\n"); + return -1; + } + + DBGTRC("DEBUG: push %s to pstack\n", s); + + if ((node = calloc(1, sizeof(struct path_stack))) == NULL) { + LOGERR("ERROR: Failed to allocate 16bytes...\n"); + return -1; + } + + node->next = pstack; + node->fullpath = s; + pstack = node; + return 0; +} + + +inline char *pstack_pop() { + struct path_stack *node = pstack; + char *s; + + if (pstack == NULL) return NULL; + + DBGTRC("DEBUG: pop %s from pstack\n", node->fullpath); + + s = node->fullpath; + pstack = node->next; + free(node); + + return s; +} + + +/** + * Traverse the directory tree from the given starting point and add everything to + * the gdbm storage. + * + * @param starting_point The directory where the travel begins. If NULL or empty the current + * path "." is used. + * @return 0 on success + * <0 on failure + */ +int traverse_directory_tree(const char *starting_point) { + char *tmp; + int rc = 0; + + if (starting_point == NULL || starting_point[0] == '\0') { + LOGERR("WARNING: No starting point given, begin at \".\"\n"); + tmp = calloc(2, 1); + tmp[0]= '.'; + } else { + tmp = calloc(1, strlen(starting_point)+1); + memcpy(tmp, starting_point, strlen(starting_point)); + } + + /* In case there are still entries on an existing pstack, take them with you */ + while (tmp != NULL) { + DBGTRC("DEBUG: process directory %s next\n", tmp); + rc = process_directory(tmp); + if (rc < 0) { return -1; } + if (tmp != starting_point) { free(tmp); } + tmp = pstack_pop(); + } + + return 0; +} + +/** + * Read directory the content is to be stored in a key:value storage with value being + * a boolean flagging the processment state. At the end flag the own entry as true. + * + * @param path struct contains the path of the file to read, results will be + * stored there. + * + * @return 0 on success + * -1 on failure + */ +int process_directory(char *path) { + char *fullpath = NULL, *fname_in_path = NULL, *stack_entry = NULL; + char type = 0; + size_t path_length = 0; + DIR *dir; + struct dirent *de = NULL; + struct stat stat_res; + + /* February 2026: Modern filesystems usually have their maximal fullpath length + * at 4096 bytes. At least to my knowledge, so if this changes the checks need + * to be corrected. + */ + if (path == NULL || ((path_length = strnlen(path,4100)) == 0)) { + LOGERR("ERROR: No path given.\n"); + return -1; + } + + if (path_length > 4096) { + LOGERR("ERROR: path longer than 4096 byte.\n"); + return -1; + } + + /* 256 byte max filename + path separator + \0 */ + if ((fullpath = calloc(path_length+258, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for fullpath.\n"); + return -1; + } + sprintf(fullpath, "%s%c", path, ((path[path_length-1] == PATH_SEP) ? '\0' : PATH_SEP)); + fname_in_path = (fullpath[path_length] == PATH_SEP) ? &(fullpath[path_length+1]) : &(fullpath[path_length]) ; + + if ((dir = opendir(path)) == NULL) { + LOGERR("ERROR: Failed to open directory '%s': %s (errno %d)\n", + path, strerror(errno), errno); + free(fullpath); + return -1; + } + + while ((de = readdir(dir)) != NULL) { + if (de->d_name[0] == '.' && option_show_hidden_entries == false) + continue; + + sprintf(fname_in_path, "%s", de->d_name); + DBGTRC("DEBUG: fullpath: '%s'\n", fullpath); + if (lstat(fullpath, &stat_res) != 0) { + LOGERR("ERROR: lstat call on '%s' failed: %s (errno %d)\n", + fullpath, strerror(errno), errno); + continue; + } + if (S_ISDIR(stat_res.st_mode)) { + type = 'D'; + if ((stack_entry=calloc(1, strlen(fullpath)+1)) == NULL) { + LOGERR("ERROR: memory allocation failed\n"); + /* saving cleanup, this error is a crash either way. With this + * location in the code may only be reached in a debugger. */ + return -1; + } + memcpy(stack_entry, fullpath, strlen(fullpath)); + pstack_push(stack_entry); + } else if (S_ISREG(stat_res.st_mode)) { + type = 'F'; + } else { + DBGTRC("DEBUG: Not a file or directory ... ignoring.\n"); + continue; + } + + /* Don't overwrite earlier runs */ + if (!kv_entry_exists(fullpath)) { + /* Ignore errors, missing entries shall show up in the error log + * and require manual intervention */ + kv_add_bool_type(fullpath, false, type); + } + } + free(fullpath); + closedir(dir); + + if (!kv_entry_exists(path)) { + if (path[0] == '.' && option_show_hidden_entries == false) { + return 0; + } + kv_add_bool_type(path, true, 'D'); + } else { + kv_set_bool(path, true); + } + + return 0; +} + + diff --git a/src/duplicate_finder.c b/src/duplicate_finder.c new file mode 100644 index 0000000..bd679e9 --- /dev/null +++ b/src/duplicate_finder.c @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/** + * Copyright 2026 Thorsten Töpper + * + * Keep track of files across different paths or filesystems with a sqlite db + * and identify duplicates. This utility should be used for housekeeping when + * spreading data across multiple FS / integrate old disks into newer setups. + * + * The DB stores SHA512 and SHA256 hashes calculated with the OpenSSL library, + * path and filenames and their corresponding stat() FS data. + * + * @file duplicate_finder.c + * + * vim:ts=4:sw=4:expandtab + */ + +#include <stdlib.h> + +#include "trace_macros.h" +#include "options.h" +#include "kv_manager.h" +#include "directory_scanner.h" + + +/*=========== DEFINES, CONSTANTS AND TYPES ===========*/ + +/*=========== GLOBAL VARIABLES ===========*/ + +/*=========== FUNCTIONS ===========*/ + +int main(int argc, char **argv) { + int path_index = 1; + + if (argc > 1) { + path_index = parse_arguments(argc, argv); + } + + /* TODO: as option */ + kv_open_storage("/tmp/duplicate_finder.gdbm"); + process_directory((path_index == argc) ? argv[path_index] : "."); + + kv_dump(stdout); + + kv_close_storage(); + + return EXIT_SUCCESS; +} + diff --git a/src/file_processor.c b/src/file_processor.c index 1cfed46..f4f9d05 100644 --- a/src/file_processor.c +++ b/src/file_processor.c @@ -21,7 +21,9 @@ #include "file_processor.h" #include "trace_macros.h" - +#include "hex_conversion.h" +#include "kv_manager.h" +#include "database_interaction.h" /*=========== DEFINES, CONSTANTS AND TYPES ===========*/ @@ -214,7 +216,7 @@ int process_file(struct df_fileinfo *info) { LOGERR("ERROR: Non-regular files are not processed.\n"); return -1; } - + if ((ctx_pkg = init_md_components()) == NULL) { LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n", fullpath); @@ -270,23 +272,121 @@ int process_file(struct df_fileinfo *info) { destroy_md_components(ctx_pkg); return -1; } - memcpy(info->blake2, md_val, md_len); + convert_from_binary(md_val, md_len, info->hashes.blake2); if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha256, md_val, &md_len) != 1) { LOGERR("ERROR: Failed to finalize MD SHA256 of file '%s'\n", fullpath); destroy_md_components(ctx_pkg); return -1; } - memcpy(info->sha256, md_val, md_len); + convert_from_binary(md_val, md_len, info->hashes.sha256); if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha512, md_val, &md_len) != 1) { LOGERR("ERROR: Failed to finalize MD SHA512 of file '%s'\n", fullpath); destroy_md_components(ctx_pkg); return -1; } - memcpy(info->sha512, md_val, md_len); + convert_from_binary(md_val, md_len, info->hashes.sha512); return 0; } +/** + * Return a file info struct with path and filename fields filled. + * @param key the fullpath used as key in the gdbm. + * @return NULL on failure + */ +struct df_fileinfo *prepare_fileinfo(char *key) { + char *tmp; + char *fname = NULL; + size_t plen=0; + struct df_fileinfo *info = NULL; + + if (key == NULL || key[0] == '\0') { + return NULL; + } + if ((fname=strrchr(key, '/')) == NULL) { + LOGERR("ERROR: path<->filename separation failed with '%s'\n", key); + return NULL; + } + + /* At this point the address of fname will always be equal or larger than keys */ + plen = (size_t) (fname - key); + fname++; /* drop the / */ + + if ((info=calloc(1, sizeof(struct df_fileinfo))) == NULL) { + return NULL; + } + + if ((tmp = calloc(plen+1, sizeof(char))) == NULL) { + free(info); + return NULL; + } + memcpy(tmp, key, plen); + info->path = tmp; + + if ((tmp = calloc(strlen(fname)+1, sizeof(char))) == NULL) { + free(info->path); + free(info); + return NULL; + } + memcpy(tmp, fname, plen); + info->name = tmp; + + return info; +} + +/** + * Iterate over the whole gdbm content. If an entry is an unprocessed file, + * process it, place the information in the database and set it as processed + * in the storage. + * @return 0 on success + * <0 on failure + */ +int process_gdbm_content() { + char *key, *tmpkey; + struct df_fileinfo *info; + int dbrc = 0; + + key = kv_first_key(); + while (key != NULL) { + /* file? already processed? */ + if (kv_get_type(key) == 'D' || kv_get_bool(key)) { + tmpkey = key; + key = kv_next_key(tmpkey); + free(tmpkey); + continue; + } + + info = prepare_fileinfo(key); + if (info == NULL) { + LOGERR("ERROR: Preparing struct for key %s failed.\n", key); + return -1; + } + if (process_file(info) < 0) { + free(info->path); + free(info->name); + free(info); + return -1; + } + + dbrc = dbi_insert_fileinfo(info); + free(info->path); + free(info->name); + free(info); + if (dbrc < 0) { + LOGERR("ERROR: Aborting after database error.\n"); + return -1; + } + + + kv_set_bool(key, true); + + tmpkey = key; + key = kv_next_key(tmpkey); + free(tmpkey); + } + + return 0; +} diff --git a/src/kv_manager.c b/src/kv_manager.c new file mode 100644 index 0000000..df95faf --- /dev/null +++ b/src/kv_manager.c @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/** + * Copyright 2026 Thorsten Töpper + * + * For now working with gdbm, which is to my knowledge ported to every + * important OS (GNU, BSD, UNIX (AIX, MacOS), Windows) and probably also + * many of the less spread ones. + * + * This key value storage contains the processable filesystem entries, + * meaning files and directories and whether they have been processed. + * + * The key is the path and fsentry name. + * + * The value a two byte char array with the first representing true or + * false via 'T' or 'F', the second the type directory 'D', file 'F' or + * '\0' if unknown. + * + * The unknown type should never show up. + * + * @file kv_manager.c + * + * vim:ts=4:sw=4:expandtab + */ +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <errno.h> + +#include <gdbm.h> + +#include "kv_manager.h" +#include "trace_macros.h" + + +/*=========== DEFINES, CONSTANTS AND TYPES ===========*/ + +/*=========== GLOBAL VARIABLES ===========*/ +GDBM_FILE gdbf; + +/*=========== FUNCTIONS ===========*/ +bool add_b_t_wrapped(char *key, bool value, char type, bool keep_original_type); + +/** + * Open the key value storage used by the manager + * + * @param fname filename of the storage, for in-memory place it on a tmpfs + * + * @return 0 on success + * -1 on wrong filename + * -2 on already open gdbm file + * -3 on failure when opening or creating the db file + */ +int kv_open_storage(char *fname) { + if (fname == NULL || fname[0] == '\0') { + LOGERR("ERROR: No valid filename\n"); + return false; + } + + if (gdbf != NULL) { + LOGERR("ERROR: Already a gdbm opened not switching\n"); + return false; + } + + /* Currently CLOEXEC is obsolete, as no exec calls are planned */ + gdbf = gdbm_open(fname, 0, GDBM_WRCREAT | GDBM_CLOEXEC | GDBM_XVERIFY, 0644, NULL); + if (gdbf == NULL) { + LOGERR("ERROR: Failed to open gdbm db: %s\n", + gdbm_strerror(gdbm_errno)); + return false; + } + return true; +} + +/** + * Close the currently storage + * + * @return true on success or if no storage was open + */ +bool kv_close_storage() { + if (gdbf == NULL) { + DBGTRC("DEBUG: No gdbm file open\n"); + return true; + } + if (gdbm_close(gdbf) != 0) { + /* Both gdbm_errno and errno are set, communicate the generic one */ + LOGERR("ERROR: Failed to close gdbm file correctly: %s (errno: %d)\n", + gdbm_strerror(gdbm_errno), errno); + return false; + } + gdbf = NULL; + return true; +} + +/** + * Local function to be wrapped by inserting/modifying ones. + * + * @param key keystring + * @param state the bool part of the value + * @param type the fs entry type of the value + * @param keep_original_type bool whether the type parameter should be set or not. + * + * @return true on success + * false on failure + */ +bool add_b_t_wrapped(char *key, bool state, char type, bool keep_original_type) { + datum k, v; + int fcall_rc; + char s[2] = { 0, 0 }; + + if (key == NULL || key[0] == '\0') { + LOGERR("ERROR: No key given.\n"); + return false; + } + + k.dptr = key; + k.dsize = (int)strlen(key)+1; + + v = gdbm_fetch(gdbf, k); + + if ((v.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) { + LOGERR("ERROR: Failed to check key existence for key '%s': %s\n", + key, gdbm_strerror(gdbm_errno)); + return false; + } + + if (v.dptr == NULL) { + v.dptr = s; + v.dsize = 2; + } + + v.dptr[0] = (state) ? 'T' : 'F'; + if (!keep_original_type) { + v.dptr[1] = type; + } + + fcall_rc = gdbm_store(gdbf, k, v, GDBM_REPLACE); + + if (v.dptr != s) + free(v.dptr); + + if (fcall_rc < 0) { + LOGERR("ERROR: Failed store value for key '%s': %s\n", + key, strerror(gdbm_errno)); + return false; + } + + return true; +} + + +/** + * Add the entry for the given key. Replaces already existing keys + * + * @param state boolean + * @param type additional single byte information + * + * @return true on success + * false on failure + */ +bool kv_add_bool_type(char *key, bool state, char type) { + return add_b_t_wrapped(key, state, type, false); +} + +/** + * Key is stored with given boolean value. If a key already exists the value is changed + * else a new key is inserted. + * + * @param key key to set + * @param value value to set + * + * @return true on success + * false on failure + */ +bool kv_set_bool(char *key, bool value) { + return add_b_t_wrapped(key, value, 0, true); +} + +/** + * Get the bool part of the stored information. CAUTION: First check separately whether key exists! + * + * @param key the key string + * @return the bool part of the value + */ +bool kv_get_bool(char *key) { + char *raw = kv_get_raw(key); + bool rc; + + if (raw == NULL) { + return false; + } + + rc = (raw[0] == 'T') ? true : false; + free(raw); + return rc; +} + +/** + * Get the type part of the stored information. CAUTION: First check separately whether key exists! + * + * @param key the key string + * @return the char represent the type + */ +char kv_get_type(char *key) { + char rc; + char *raw = kv_get_raw(key); + + if (raw == NULL) { + return 0; + } + + rc = raw[1]; + free(raw); + return rc; +} + + +/** + * Get the raw value. CAUTION: First check separately whether key exists! + * + * @param key the key string + * @return NULL in case of error or no value stored + * pointer to a short array which needs to be freed after processment + */ +char *kv_get_raw(char *key) { + datum k, v; + + if (key == NULL || key[0] == '\0') { + LOGERR("ERROR: No key given.\n"); + return NULL; + } + + k.dptr = key; + k.dsize = (int)strlen(key); + + v = gdbm_fetch(gdbf, k); + + if (gdbm_errno != GDBM_ITEM_NOT_FOUND) { + LOGERR("ERROR: Failed to fetch value for key '%s': %s\n", + key, gdbm_strerror(gdbm_errno)); + } + + return v.dptr; +} + + +/** + * Simple check if there's data for the key + * + * @param key the keystring + * @return bool whether entry exists + */ +bool kv_entry_exists(char *key) { + datum k; + + if (key == NULL || key[0] == '\0') { + LOGERR("ERROR: No key given.\n"); + return false; + } + + k.dptr = key; + k.dsize = (int)strlen(key); + + if (gdbm_exists(gdbf, k) == 1) { + return true; + } + if (gdbm_errno != GDBM_NO_ERROR) { + LOGERR("ERROR: Failed to verify existence of entry for key '%s': %s\n", + key, gdbm_strerror(gdbm_errno)); + } + return false; +} + +/** + * Wrapper to get the first key from the storage + * @return pointer to a key, needs to be freed manually + * NULL in case of error or an empty storage + */ +char *kv_first_key() { + datum k; + k = gdbm_firstkey(gdbf); + if ((k.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) { + LOGERR("ERROR: Request for first key failed: %s\n", + gdbm_strerror(gdbm_errno)); + } + return k.dptr; +} + +/** + * Wrapper to get the next key stored + * @return pointer to the key + * NULL if the end has been reached or an error occured + */ +char *kv_next_key(char *key) { + datum k, nk; + + if (key == NULL || key[0] == '\0') { + LOGERR("ERROR: No valid key given\n"); + return NULL; + } + + k.dptr = key; + k.dsize = (int)strlen(key); + + nk = gdbm_nextkey(gdbf, k); + + if ((nk.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) { + LOGERR("ERROR: Request for next key after '%s' failed: %s\n", + key, gdbm_strerror(gdbm_errno)); + } + + return nk.dptr; +} + +/** + * Dump the storage data to the given output stream in incomplete JSON format + * + * @param out the file stream, if NULL stdout is used + */ +void kv_dump(FILE *out) { + datum k, nk, v; + + if (out == NULL) { + out = stdout; + } + + k = gdbm_firstkey(gdbf); + while (k.dptr != NULL) { + v = gdbm_fetch(gdbf, k); + + if (v.dptr != NULL) { + fprintf(out, "{k='%s', v='%c%c'},\n", k.dptr, v.dptr[0], v.dptr[1]); + free(v.dptr); + } else { + if (gdbm_errno == GDBM_ITEM_NOT_FOUND) { + fprintf(out, "{k='%s', v=NULL},}\n", k.dptr); + } else { + LOGERR("ERROR: Failed to get value for '%s': %s\n", + k.dptr, gdbm_strerror(gdbm_errno)); + } + } + + nk = gdbm_nextkey(gdbf, k); + free (k.dptr); + k = nk; + } +} diff --git a/src/options.c b/src/options.c new file mode 100644 index 0000000..214a72c --- /dev/null +++ b/src/options.c @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/* Copyright 2026 Thorsten Töpper + * + * vim:ts=4:sw=4:expandtab + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <stdbool.h> +#include <ctype.h> + +#include "options.h" +#include "trace_macros.h" + + +/* === GLOBAL VARIABLES === */ +struct option long_options[] = { + { "help", no_argument, 0, 0 }, + { "quiet", no_argument, 0, 0 }, + { "show-hidden-entries", no_argument, 0, 0 }, + { 0, 0, 0, 0 } +}; + +bool option_quiet = false; +bool option_show_hidden_entries = false; + +char *exec_name; + +/* === IMPLEMENTATION === */ + +void usage(char *executable) { + fprintf(stderr, "Call: %s OPTIONS path_to_open\n", executable); + fprintf(stderr, "\nOPTIONS are\n"); + /* long name, short name, optional argument, explanation */ + fprintf(stderr, " %-25s %2s %10s - %s\n", "--help", "-h", "", + "Show this message and exit"); + fprintf(stderr, " %-25s %2s %10s - %s\n", "--quiet", "-q", "", + "Don't print error messages or warnings"); + fprintf(stderr, " %-25s %2s %10s - %s\n", "--show-hidden-entries", "-a", "", + "Show hidden entries in the directory"); +} + + +void set_option(const char *option_name, char *option_argument) { + + DBGTRC("DEBUG: called with option_name '%s' and option_argument '%s'\n", + option_name, option_argument); + + if (option_name == NULL) + return; + + /* options WITHOUT arguments */ + if (strcmp("help", option_name) == 0) { + usage(exec_name); + exit(EXIT_SUCCESS); + } + + if (strcmp("quiet", option_name) == 0) { + option_quiet = true; + return; + } + + if (strcmp("show-hidden-entries", option_name) == 0) { + option_show_hidden_entries = true; + return; + } + + /* options WITH arguments */ + if (option_argument == NULL || option_argument[0] == '\0') { + LOGERR("ERROR: option_name %s with missing option_argument\n", + option_name); + exit(EXIT_FAILURE); + } + + LOGERR("ERROR: Option '%s' not recognized\n.", option_name); +} + + +int parse_arguments(int argc, char **argv) { + int c = 0, index; + /* exec_name is a file internal global variable for --help in set_option() */ + exec_name = argv[0]; + + while(1) { + index = 0; + c = getopt_long(argc, argv, "hqs", long_options, &index); + + if (c == -1) { + break; + } + + switch (c) { + case 0: + set_option(long_options[index].name, optarg); + break; + case 'h': + usage(exec_name); + exit(EXIT_SUCCESS); + case 'q': + option_quiet = true; + break; + case 's': + option_show_hidden_entries = true; + break; + case '?': + break; + default: + LOGERR("ERROR: unrecognized option 0x%02X '%c'\n", c, c); + usage(exec_name); + exit(EXIT_FAILURE); + } + } + + return optind; +} + |
