aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
committerThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
commitd513977a3566b14d9357906615d045d71741537f (patch)
tree3e707d2de9da71d98650fa8bb1b92ed11ab724ba
parenteed2d1323441861f2d41f0ecc0a72fcc9190fa5f (diff)
downloadduplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.gz
duplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.bz2
squash initial implementation
-rw-r--r--.gitignore4
-rw-r--r--CMakeLists.txt87
-rw-r--r--README.md3
-rwxr-xr-xbuild.sh8
-rw-r--r--include/database_interaction.h34
-rw-r--r--include/directory_scanner.h15
-rw-r--r--include/file_processor.h15
-rw-r--r--include/kv_manager.h25
-rw-r--r--include/options.h31
-rw-r--r--man/duplicate_finder.1.adoc41
-rw-r--r--src/database_interaction.c885
-rw-r--r--src/directory_scanner.c211
-rw-r--r--src/duplicate_finder.c49
-rw-r--r--src/file_processor.c110
-rw-r--r--src/kv_manager.c347
-rw-r--r--src/options.c118
16 files changed, 1978 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c82077c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+bin/*
+man/*.1
+man/*.gz
+.gdbinit
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..4d2ed7c
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 3.15...4.0)
+
+project(duplicate_finder
+ LANGUAGES C
+)
+
+# Although C11 would be sufficient, be safe to set this minimum in order to
+# prevent errors from unbeknown features, used to and assuming to be there longer...
+set (CMAKE_C_STANDARD 17)
+
+add_compile_options(-Wall -Wextra -Wformat -Wformat=2 -Wconversion -Wimplicit-fallthrough -Werror=format-security
+ -Wbidi-chars=any -Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion
+ -fno-delete-null-pointer-checks -fstack-clash-protection -fstack-protector-strong -fstrict-flex-arrays=3)
+
+set(SOURCE_DF src/duplicate_finder.c src/options.c src/kv_manager.c src/directory_scanner.c src/file_processor.c src/database_interaction.c)
+
+set(USED_LIBS gdbm crypto sqlite3)
+
+add_executable(duplicate_finder_debug ${SOURCE_DF})
+target_include_directories(duplicate_finder_debug PRIVATE include)
+target_compile_options(duplicate_finder_debug PUBLIC -g -DDEBUGBUILD -Werror)
+target_link_libraries(duplicate_finder_debug ${USED_LIBS})
+
+add_executable(duplicate_finder_debug_asan ${SOURCE_DF})
+target_include_directories(duplicate_finder_debug_asan PRIVATE include)
+target_compile_options(duplicate_finder_debug_asan PUBLIC -g -DDEBUGBUILD -fsanitize=address -Werror)
+target_link_libraries(duplicate_finder_debug_asan asan ${USED_LIBS})
+
+add_executable(duplicate_finder_asan ${SOURCE_DF})
+target_include_directories(duplicate_finder_asan PRIVATE include)
+target_compile_options(duplicate_finder_asan PUBLIC -fsanitize=address)
+target_link_libraries(duplicate_finder_asan asan ${USED_LIBS})
+
+add_executable(duplicate_finder ${SOURCE_DF})
+target_include_directories(duplicate_finder PRIVATE include)
+target_compile_options(duplicate_finder PUBLIC -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -FPIE -pie -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries)
+target_link_libraries(duplicate_finder ${USED_LIBS})
+
+
+# The man page generation...
+
+find_program(ASCIIDOC_EXEC NAMES asciidoc)
+if (NOT ASCIIDOC_EXEC)
+ message(WARNING "Can't find asciidoc, skip manpage generation")
+ return()
+endif()
+
+find_program(XMLTO_EXEC NAMES xmlto)
+if (NOT XMLTO_EXEC)
+ message(WARNING "Can't find xmlto, skip manpage generation")
+ return()
+endif()
+
+find_program(COMPRESS_EXEC NAMES gzip)
+if (NOT COMPRESS_EXEC)
+ message(WARNING "Can't find gzip, skip manpage generation")
+ return()
+endif()
+
+
+set(MANPAGE duplicate_finder.1)
+set(ASCIIDOC_GENMAN ${ASCIIDOC_EXEC} -b docbook -d manpage)
+
+add_custom_command(
+ OUTPUT ${MANPAGE}.xml
+ COMMAND ${ASCIIDOC_GENMAN} -o ${MANPAGE}.xml ${CMAKE_CURRENT_SOURCE_DIR}/man/${MANPAGE}.adoc
+ COMMENT "Create DocBook XML ${MANPAGE}.xml ..."
+)
+
+set(XMLTO_GENMAN ${XMLTO_EXEC} man)
+add_custom_command(
+ OUTPUT ${MANPAGE}
+ COMMAND ${XMLTO_GENMAN} ${MANPAGE}.xml
+ DEPENDS ${MANPAGE}.xml
+ COMMENT "Create man page from DocBook XML ${MANPAGE}.xml ..."
+)
+
+add_custom_command(
+ OUTPUT ${MANPAGE}.gz
+ COMMAND ${COMPRESS_EXEC} -f -9 ${MANPAGE}
+ DEPENDS ${MANPAGE}
+ COMMENT "Compressing the manpage via gzip..."
+)
+
+add_custom_target(man ALL
+ DEPENDS ${MANPAGE}.gz
+)
diff --git a/README.md b/README.md
index 0b6a16f..11a67e6 100644
--- a/README.md
+++ b/README.md
@@ -24,4 +24,7 @@ The latter requires `asciidoc`, `xmlto` and `gzip` for compression.
TODO
====
+- Code review: So far most code was written in the evening after work and dinner, so there are probably several places which can be improved. database\_interaction especially as there's a lot of repetive code for the more special queries.
+- Documentation: goes parallel with the review. Also formatting of the related comments must be checked.
+- Pushing stuff into the sqlite DB is done, next step: Use it.
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..e1c1208
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+# I'm a lazy guy
+BDIR="/dev/shm/${PWD##*/}_cmakebuild/"
+mkdir -p bin
+cmake -S . -B "${BDIR}" && ( cd "${BDIR}" ; make) && \
+ find "${BDIR}" -maxdepth 1 -perm /u=x,g=x,o=x -type f -exec cp "{}" bin/ \; && \
+ cp -v "${BDIR}${PWD##*/}.1.gz" man/
+
diff --git a/include/database_interaction.h b/include/database_interaction.h
new file mode 100644
index 0000000..9ae0da7
--- /dev/null
+++ b/include/database_interaction.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef DATABASE_INTERACTION_H
+#define DATABASE_INTERACTION_H
+
+#include "file_processor.h"
+
+/*=========== FUNCTIONS ===========*/
+bool dbi_open(char *filename);
+void dbi_close();
+
+char *dbi_select_filename_by_id(int64_t id);
+int64_t dbi_select_filename_by_name(const char *name);
+int dbi_insert_filename(const char *filename);
+
+char *dbi_select_path_by_id(int64_t id);
+int64_t dbi_select_path_by_pathname(const char *pathname);
+int dbi_insert_pathname(const char *path);
+
+struct df_hashstrings *dbi_select_hashes_by_id(int64_t id);
+int64_t dbi_select_hashes_by_strings(const char *blake2, const char *sha256, const char *sha512);
+int dbi_insert_hashes(const char *blake2, const char *sha256, const char *sha512);
+
+int64_t dbi_select_fileinfo_by_hash_path_filename_ids(int64_t hash_id, int64_t path_id, int64_t filename_id);
+int64_t dbi_select_fileinfo_by_path_filename_ids(int64_t pname_id, int64_t fname_id);
+int dbi_insert_fileinfo(struct df_fileinfo *fi);
+
+int dbi_update_fileinfo_last_seen(int64_t id);
+int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id);
+#endif
diff --git a/include/directory_scanner.h b/include/directory_scanner.h
new file mode 100644
index 0000000..7b2e7d2
--- /dev/null
+++ b/include/directory_scanner.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef DIRECTORY_SCANNER_H
+#define DIRECTORY_SCANNER_H
+
+/*=========== FUNCTIONS ===========*/
+int process_directory(char *path);
+int traverse_directory_tree(const char *starting_point);
+
+#endif
+
diff --git a/include/file_processor.h b/include/file_processor.h
index 8cfb6de..cdb30bb 100644
--- a/include/file_processor.h
+++ b/include/file_processor.h
@@ -11,6 +11,8 @@
#define DF_BYTE_SIZE_256 32
#define DF_BYTE_SIZE_512 64
+#define DF_STR_SIZE_256 64
+#define DF_STR_SIZE_512 128
/* Aliases for convenience, currently all algorithms are part of the default
* provider. */
@@ -19,6 +21,15 @@
#define DF_OSSL_SHA512 "SHA2-512"
/**
+ * The hashes in human-readable strings
+ */
+struct df_hashstrings {
+ char blake2[DF_STR_SIZE_512];
+ char sha256[DF_STR_SIZE_256];
+ char sha512[DF_STR_SIZE_512];
+};
+
+/**
* information about a file
* Contains filepath, stat() results, hash values of multiple algorithms.
* TODO: Organize the paths in a global pool (list/tree/map) and only refer there
@@ -27,15 +38,19 @@
struct df_fileinfo {
char *path; /**< pointer to the path of the file */
char *name; /**< pointer to the name of the file */
+#if 0
unsigned char blake2[DF_BYTE_SIZE_512]; /**< The BLAKE2-512 hash in binary form */
unsigned char sha256[DF_BYTE_SIZE_256]; /**< The SHA256 hash in binary form. */
unsigned char sha512[DF_BYTE_SIZE_512]; /**< The SHA512 hash in binary form. */
+#endif
+ struct df_hashstrings hashes; /**< The BLAKE2-512, SHA256 and SHA512 hash of the file */
struct stat statbuf; /**< Result of lstat() call. Symlinks are to be ignored and filtered out earlier. */
};
/*=========== FUNCTIONS ===========*/
int process_file(struct df_fileinfo *info);
+int process_gdbm_content();
#endif
diff --git a/include/kv_manager.h b/include/kv_manager.h
new file mode 100644
index 0000000..068841b
--- /dev/null
+++ b/include/kv_manager.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef KV_MANAGER_H
+#define KV_MANAGER_H
+
+/*=========== FUNCTIONS ===========*/
+int kv_open_storage(char *fname);
+bool kv_close_storage();
+bool kv_add_bool_type(char *key, bool value, char type);
+bool kv_set_bool(char *key, bool value);
+bool kv_get_bool(char *key);
+char kv_get_type(char *key);
+char *kv_get_raw(char *key);
+bool kv_entry_exists(char *key);
+char *kv_first_key();
+char *kv_next_key(char *key);
+
+void kv_dump(FILE *out);
+
+#endif
+
diff --git a/include/options.h b/include/options.h
new file mode 100644
index 0000000..5a7747e
--- /dev/null
+++ b/include/options.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+
+#ifndef OPTIONS_H
+#define OPTIONS_H
+
+#include <stdbool.h>
+
+/* === DEFINITIONS === */
+/* TODO: if ported to other platforms, those precompiler checks need to be extended */
+#ifndef PATH_SEP
+#define PATH_SEP '/'
+#endif
+
+
+/* === GLOBAL VARIABLES === */
+
+extern bool option_quiet;
+extern bool option_show_hidden_entries;
+
+
+int parse_arguments(int argc, char **argv);
+void set_option(const char *option_name, char *option_argument);
+void usage(char *executable);
+
+#endif
+
diff --git a/man/duplicate_finder.1.adoc b/man/duplicate_finder.1.adoc
new file mode 100644
index 0000000..8929fb3
--- /dev/null
+++ b/man/duplicate_finder.1.adoc
@@ -0,0 +1,41 @@
+dir_monitor(1)
+==============
+
+== NAME
+
+duplicate_finder - find duplicate files
+
+
+== SYNOPSIS
+
+**duplicate_finder** [[*OPTION*]... directory]
+
+
+== DESCRIPTION
+duplicate_finder scans a given path and its subdirectories creating a database with information about found files.
+The information consists of the common information gained via stat from the filesystem, but files are also read
+and processed to get hashes with different algorithms provided by OpenSSL. When a scan happens information about
+which directories and files have been processed are stored in a gdbm key-value storage making interruptions easier
+to handle.
+
+
+== OPTIONS
+
+**--help**, **-h**::
+ Show this message and exit
+
+**--quiet**, **-q**::
+Suppress error and warning messages.
+
+**--show-hidden-entries**, **-s**::
+Show hidden entries in the directory
+
+
+== EXAMPLES
+
+duplicate_finder --show-hidden /tmp
+
+
+
+== AUTHOR
+Thorsten Töpper
diff --git a/src/database_interaction.c b/src/database_interaction.c
new file mode 100644
index 0000000..e31d197
--- /dev/null
+++ b/src/database_interaction.c
@@ -0,0 +1,885 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * The database contains those tables:
+ * - filenames
+ * -> id INTEGER PRIMARY KEY
+ * -> name TEXT
+ * - paths
+ * -> id INTEGER PRIMARY KEY
+ * -> pathname TEXT
+ * - fileinfo
+ * -> id INTEGER PRIMARY KEY
+ * -> p_id INTEGER
+ * -> fn_id INTEGER
+ * -> h_id INTEGER
+ * -> size INTEGER
+ * -> last_seen INTEGER
+ * -> stat_bin BLOB
+ * - hashes
+ * -> id INTEGER PRIMARY KEY
+ * -> blake2 TEXT
+ * -> sha256 TEXT
+ * -> sha512 TEXT
+ *
+ * @file database_interaction.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include <time.h>
+
+#include <sqlite3.h>
+
+#include "options.h"
+#include "database_interaction.h"
+#include "trace_macros.h"
+#include "file_processor.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+/*=========== GLOBAL VARIABLES ===========*/
+sqlite3 *dbconn = NULL;
+
+/* The statements will be wrapped via a function dbi_STATEMENTNAME() for the outside */
+sqlite3_stmt *select_filename_by_id,
+ *select_filename_by_name,
+ *select_path_by_id,
+ *select_path_by_pathname,
+ *select_hashes_by_id,
+ *select_hashes_by_strings,
+ *select_fileinfo_by_id,
+ *select_fileinfo_by_id_resolved,
+ *select_fileinfo_by_path_id,
+ *select_fileinfo_by_filename_id,
+ *select_fileinfo_by_path_filename_ids,
+ *select_fileinfo_by_hash_path_filename_ids,
+ *select_fileinfo_by_hash_id;
+
+sqlite3_stmt *insert_filename,
+ *insert_pathname,
+ *insert_hashes,
+ *insert_fileinfo;
+
+sqlite3_stmt *update_fileinfo_last_seen,
+ *update_fileinfo_complete;
+
+sqlite3_stmt *delete_fileinfo_by_id;
+
+/*=========== FUNCTIONS ===========*/
+void create_tables();
+int prepare_statements();
+char *select_string_by_int(sqlite3_stmt *st, int64_t id);
+
+/* Writing this block way too often */
+#define DBCONN_CHECK(x) \
+ if (dbconn==NULL){ LOGERR("ERROR: No database connection.\n");\
+ return x; }
+
+
+
+bool dbi_open(char *filename) {
+ if (filename == NULL || filename[0] == '\0') {
+ LOGERR("ERROR: No valid filename given.\n");
+ return false;
+ }
+
+ if (dbconn != NULL) {
+ LOGERR("ERROR: There's already an open database\n");
+ return false;
+ }
+
+ if (sqlite3_open(filename, &dbconn) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to open database: %s\n",
+ sqlite3_errmsg(dbconn));
+ sqlite3_close(dbconn);
+ dbconn = NULL;
+ return false;
+ }
+
+ create_tables();
+ if (prepare_statements() != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+void dbi_close() {
+ /* TODO: sqlite3_finalize for all prepared statements */
+#define LOCAL_FINALIZE(x) { sqlite3_finalize(x); x=NULL; }
+
+ DBCONN_CHECK();
+
+ LOCAL_FINALIZE(select_filename_by_id);
+ LOCAL_FINALIZE(select_filename_by_name);
+ LOCAL_FINALIZE(select_path_by_id);
+ LOCAL_FINALIZE(select_path_by_pathname);
+ LOCAL_FINALIZE(select_hashes_by_id);
+ LOCAL_FINALIZE(select_hashes_by_strings);
+ LOCAL_FINALIZE(select_fileinfo_by_id);
+ LOCAL_FINALIZE(select_fileinfo_by_id_resolved);
+ LOCAL_FINALIZE(select_fileinfo_by_path_id);
+ LOCAL_FINALIZE(select_fileinfo_by_filename_id);
+ LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids);
+ LOCAL_FINALIZE(select_fileinfo_by_hash_id);
+
+ LOCAL_FINALIZE(insert_filename);
+ LOCAL_FINALIZE(insert_pathname);
+ LOCAL_FINALIZE(insert_hashes);
+ LOCAL_FINALIZE(insert_fileinfo);
+
+ LOCAL_FINALIZE(update_fileinfo_last_seen);
+ LOCAL_FINALIZE(update_fileinfo_complete);
+
+ LOCAL_FINALIZE(delete_fileinfo_by_id);
+
+#undef LOCAL_FINALIZE
+
+ sqlite3_close(dbconn);
+ dbconn = NULL;
+}
+
+
+/**
+ * Create the later used tables if they don't exist yet
+ */
+inline void create_tables() {
+ char *err = NULL;
+
+ sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS filenames ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE);", NULL, NULL, &err);
+ if (err != NULL) {
+ LOGERR("ERROR: Creation of table filenames failed: %s\n", err);
+ sqlite3_free(err);
+ err = NULL;
+ }
+
+ sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS paths ( id INTEGER PRIMARY KEY AUTOINCREMENT, pathname TEXT UNIQUE);", NULL, NULL, &err);
+ if (err != NULL) {
+ LOGERR("ERROR: Creation of table pathss failed: %s\n", err);
+ sqlite3_free(err);
+ err = NULL;
+ }
+
+ /* no UNIQUE here, as even for the rare case of a hash collission in a single algorithm, all three won't collide at the same time. */
+ sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS hashes ( id INTEGER PRIMARY KEY AUTOINCREMENT, blake2 TEXT, sha256 TEXT, sha512 TEXT );", NULL, NULL, &err);
+ if (err != NULL) {
+ LOGERR("ERROR: Creation of table hashes failed: %s\n", err);
+ sqlite3_free(err);
+ err = NULL;
+ }
+ sqlite3_exec(dbconn, "CREATE TABLE IF NOT EXISTS fileinfo ( id INTEGER PRIMARY KEY, p_id INTEGER, "
+ "fn_id INTEGER, h_id INTEGER, size INTEGER, last_seen INTEGER, stat_struct BLOB, "
+ "FOREIGN KEY(p_id) REFERENCES paths(id), FOREIGN KEY(fn_id) REFERENCES filenames(id), "
+ "FOREIGN KEY(h_id) REFERENCES hashes(id));", NULL, NULL, &err);
+ if (err != NULL) {
+ LOGERR("ERROR: Creation of table fileinfo failed: %s\n", err);
+ sqlite3_free(err);
+ err = NULL;
+ }
+}
+
+int prepare_statements() {
+ int counter = 0;
+ /* Error handling in KISS. */
+#define LOCAL_PREP_STMT(q, s) { counter++; \
+ if ((sqlite3_prepare_v2(dbconn, q, -1, s, NULL)) != SQLITE_OK) { \
+ LOGERR("ERROR: Failed to prepare statement %d '%s': %s\n", \
+ counter, q, sqlite3_errmsg(dbconn)); return -1; } \
+ }
+
+ /* SELECT */
+ LOCAL_PREP_STMT("SELECT name FROM filenames WHERE id = ? ;", &select_filename_by_id);
+ LOCAL_PREP_STMT("SELECT id FROM filenames WHERE name = ? ;", &select_filename_by_name);
+
+ LOCAL_PREP_STMT("SELECT pathname FROM paths WHERE id = ? ;", &select_path_by_id);
+ LOCAL_PREP_STMT("SELECT id FROM paths WHERE pathname = ? ;", &select_path_by_pathname);
+
+ LOCAL_PREP_STMT("SELECT blake2, sha256, sha512 FROM hashes WHERE id = ? ;", &select_hashes_by_id);
+ LOCAL_PREP_STMT("SELECT id FROM hashes WHERE blake2 = ? AND sha256 = ? AND sha512 = ? ;", &select_hashes_by_strings);
+
+
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE id = ? ;", &select_fileinfo_by_id);
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? ;", &select_fileinfo_by_path_id);
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE fn_id = ? ;", &select_fileinfo_by_filename_id);
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE p_id = ? AND fn_id = ? ;", &select_fileinfo_by_path_filename_ids);
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? AND p_id = ? AND fn_id = ? ;", &select_fileinfo_by_hash_path_filename_ids);
+ LOCAL_PREP_STMT("SELECT * FROM fileinfo WHERE h_id = ? ;", &select_fileinfo_by_hash_id);
+
+ /* TODO: so far the only query with JOINs or masking it in another way? Ignore this frontend query for now.*/
+ select_fileinfo_by_id_resolved = NULL;
+
+ /* INSERT */
+ LOCAL_PREP_STMT("INSERT INTO filenames (name) VALUES (?);", &insert_filename);
+ LOCAL_PREP_STMT("INSERT INTO paths (pathname) VALUES (?);", &insert_pathname);
+ LOCAL_PREP_STMT("INSERT INTO hashes (blake2, sha256, sha512) VALUES (@blake2, @sha256, @sha512);", &insert_hashes);
+ LOCAL_PREP_STMT("INSERT INTO fileinfo (p_id, fn_id, h_id, size, last_seen, stat_struct) "
+ "VALUES (?, ?, ?, ?, ?, ?);", &insert_fileinfo);
+
+ /* UPDATE */
+ LOCAL_PREP_STMT("UPDATE fileinfo SET last_seen = @time WHERE id = @id ;", &update_fileinfo_last_seen);
+ LOCAL_PREP_STMT("UPDATE fileinfo SET p_id = @pid , fn_id = @fnid , h_id = @hid , "
+ "size = @sz , last_seen = @ls, stat_struct = @stat WHERE id = @id ;", &update_fileinfo_complete);
+
+ /* DELETE */
+ LOCAL_PREP_STMT("DELETE FROM fileinfo WHERE id = ? ;", &delete_fileinfo_by_id);
+
+#undef LOCAL_PREP_STMT
+ return 0;
+}
+
+/**
+ * To be wrapped for simple SELECT text ... WHERE PK = id; statements.
+ * @param st A prepared statement
+ * @param id A 64 bit integer used as primary key in the query.
+ * @return NULL in case of error
+ * copy of the database content
+ */
+char *select_string_by_int(sqlite3_stmt *st, int64_t id) {
+ char *result = NULL;
+ int strc = 0;
+ const char *text;
+
+ DBCONN_CHECK(NULL);
+
+ if (st == NULL) {
+ LOGERR("ERROR: No prepared statement.\n");
+ return NULL;
+ }
+
+ if (id < 1) {
+ //LOGERR("ERROR: Invalid id %" PRId64 "\n", id);
+ LOGERR("ERROR: Invalid id %ld\n", id);
+ return NULL;
+ }
+
+ if (sqlite3_bind_int64(st, 1, id) != SQLITE_OK) {
+ // LOGERR("ERROR: Failed to bind id %" PRId64 " to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ LOGERR("ERROR: Failed to bind id %ld to prepared statement: %s\n", id, sqlite3_errmsg(dbconn));
+ return NULL;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ text = (const char *)sqlite3_column_text(st, 0);
+ if ((result = calloc((strlen(text)+1), sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for copy of query result.\n");
+ sqlite3_reset(st);
+ return NULL;
+ }
+ memcpy(result, text, strlen(text));
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return result;
+}
+
+/**
+ *
+ * @return 0 if ok, <0 in case of error
+ */
+int insert_text(sqlite3_stmt *st, int64_t (*check_function)(const char*), const char *text) {
+ int strc = 0;
+
+ DBCONN_CHECK(-1);
+
+ if (st == NULL) {
+ LOGERR("ERROR: No prepared statement.\n");
+ return -1;
+ }
+
+ if (text == NULL) {
+ LOGERR("ERROR: No content to insert.\n");
+ return -1;
+ }
+
+ /* CHECK WHETHER THE ENTRY ALREADY EXISTS! */
+ if ((check_function != NULL) && (*check_function)(text) > 0) {
+ return 0;
+ }
+
+ if (sqlite3_bind_text(st, 1, text, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind text '%s' to statement: %s\n", text, sqlite3_errmsg(dbconn));
+ return -1;
+ }
+
+ strc = sqlite3_step(st);
+ if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed to insert text '%s' into db: %s\n", text, sqlite3_errmsg(dbconn));
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return (strc == SQLITE_DONE) ? 0 : -1;
+}
+
+/**
+ * To be wrapped for simple SELECT text ... WHERE COL = string; statements COL being UNIQUE.
+ * @param st A prepared statement
+ * @param id a string bound to the WHERE in the statement
+ * @return < -1 in case of error
+ * 0 if not found
+ * >0 the id in the database
+ */
+int64_t select_int_by_string(sqlite3_stmt *st, const char *s) {
+ int64_t result = -1;
+ int strc = 0;
+
+ DBCONN_CHECK(-2);
+
+ if (st == NULL) {
+ LOGERR("ERROR: No prepared statement.\n");
+ return -2;
+ }
+
+ if (s == NULL || strlen(s)==0) {
+ LOGERR("ERROR: Invalid string %s\n", s);
+ return -2;
+ }
+
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_text(st, 1, s, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind string %s to prepared statement: %s\n", s, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ result = (int64_t) sqlite3_column_int64(st, 0);
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ result = -2;
+ } else { /* SQLITE_DONE => EMPTY */
+ DBGTRC("DEBUG: Combination not found in db\n");
+ result = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return result;
+}
+
+
+
+char *dbi_select_filename_by_id(int64_t id) {
+ return select_string_by_int(select_filename_by_id, id);
+}
+
+char *dbi_select_path_by_id(int64_t id) {
+ return select_string_by_int(select_path_by_id, id);
+}
+
+int64_t dbi_select_filename_by_name(const char *name) {
+ return select_int_by_string(select_filename_by_name, name);
+}
+
+int64_t dbi_select_path_by_pathname(const char *pathname) {
+ return select_int_by_string(select_path_by_pathname, pathname);
+}
+
+
+int dbi_insert_filename(const char *filename) {
+ return insert_text(insert_filename, dbi_select_filename_by_name, filename);
+}
+
+int dbi_insert_pathname(const char *path) {
+ return insert_text(insert_pathname, dbi_select_path_by_pathname, path);
+}
+
+int64_t dbi_select_hashes_by_strings(const char *blake2, const char *sha256, const char *sha512) {
+ int64_t result = 0;
+ int strc = 0;
+ sqlite3_stmt *st = select_hashes_by_strings;
+
+ DBCONN_CHECK(-2);
+
+ if (blake2 == NULL || sha256 == NULL || sha512 == NULL ||
+ strlen(blake2)==0 || strlen(sha256)==0 || strlen(sha512)==0) {
+ LOGERR("ERROR: Invalid argument: blake2=%s sha256=%s sha512=%s\n",
+ blake2, sha256, sha512);
+ return -2;
+ }
+
+ /* Avoid conflict with earlier calls */
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "blake2"), blake2, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field blake2 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha256"), sha256, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field sha256 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha512"), sha512, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field sha512 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ result = (int64_t) sqlite3_column_int64(st, 0);
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ result = -2;
+ } else { /* SQLITE_DONE => EMPTY */
+ DBGTRC("DEBUG: Combination not found in db\n");
+ result = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return result;
+}
+
+int dbi_insert_hashes(const char *blake2, const char *sha256, const char *sha512) {
+ int rc = 0;
+ int64_t strc = 0;
+ sqlite3_stmt *st = insert_hashes;
+
+ DBCONN_CHECK(-2);
+
+ if (blake2 == NULL || sha256 == NULL || sha512 == NULL ||
+ strlen(blake2)==0 || strlen(sha256)==0 || strlen(sha512)==0) {
+ LOGERR("ERROR: Invalid argument: blake2=%s sha256=%s sha512=%s\n",
+ blake2, sha256, sha512);
+ return -2;
+ }
+
+ if (dbi_select_hashes_by_strings(blake2, sha256, sha512) > 0) {
+ return 0;
+ }
+
+ /* Avoid conflict with earlier calls */
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "blake2"), blake2, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field blake2 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha256"), sha256, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field sha256 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_text(st, sqlite3_bind_parameter_index(st, "sha512"), sha512, -1, SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind field sha512 to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed to insert hashes (blake2=%s, sha256=%s, sha512=%s) into db: %s\n",
+ blake2, sha256, sha512, sqlite3_errmsg(dbconn));
+ rc = -2;
+ } else {
+ rc = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return rc;
+}
+
+struct df_hashstrings *dbi_select_hashes_by_id(int64_t id) {
+ struct df_hashstrings *result = NULL;
+ int strc = 0;
+ sqlite3_stmt *st = select_hashes_by_id;
+ const char *text;
+
+ DBCONN_CHECK(NULL);
+
+ if (id < 1) {
+ LOGERR("ERROR: invalid id %lld\n", (long long int)id); /* TODO: Macro resolve not ok */
+ return NULL;
+ }
+
+ if (sqlite3_bind_int64(st, 1, id) != SQLITE_OK) {
+ // LOGERR("ERROR: Failed to bind id %" PRId64 " to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ LOGERR("ERROR: Failed to bind id %ld to prepared statement: %s\n", id, sqlite3_errmsg(dbconn));
+ return NULL;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ if ((result = calloc(1, sizeof(struct df_hashstrings))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for copy of query result.\n");
+ sqlite3_reset(st);
+ return NULL;
+ }
+ text = (const char *)sqlite3_column_text(st, 0);
+ memcpy(result->blake2, text, strlen(text));
+ text = (const char *)sqlite3_column_text(st, 1);
+ memcpy(result->sha256, text, strlen(text));
+ text = (const char *)sqlite3_column_text(st, 2);
+ memcpy(result->sha512, text, strlen(text));
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+ return result;
+}
+
+
+int64_t dbi_select_fileinfo_by_hash_path_filename_ids(int64_t hash_id, int64_t path_id, int64_t filename_id) {
+ int64_t result = 0;
+ int strc = 0;
+ sqlite3_stmt *st = select_fileinfo_by_hash_path_filename_ids;
+
+ DBCONN_CHECK(-2);
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if ( hash_id < 1 || path_id < 1 || filename_id < 1 ) {
+ LOGERR("ERROR: At least one invalid id: hashes %ld | path %ld | filename %ld\n",
+ hash_id, path_id, filename_id);
+ return -2;
+ }
+
+ if (sqlite3_bind_int64(st, 1, hash_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind hash_id %ld to prepared statement: %s\n", hash_id, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_int64(st, 2, path_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind path_id %ld to prepared statement: %s\n", path_id, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_int64(st, 3, filename_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind filename_id %ld to prepared statement: %s\n", filename_id, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ result = (int64_t) sqlite3_column_int64(st, 0);
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ result = -2;
+ } else { /* SQLITE_DONE => EMPTY */
+ DBGTRC("DEBUG: Combination not found in db\n");
+ result = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+
+ return result;
+}
+
+int64_t dbi_select_fileinfo_by_path_filename_ids(int64_t pname_id, int64_t fname_id) {
+ int64_t result = 0;
+ int strc = 0;
+ sqlite3_stmt *st = select_fileinfo_by_path_filename_ids;
+
+ DBCONN_CHECK(-2);
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if ( pname_id < 1 || fname_id < 1 ) {
+ LOGERR("ERROR: At least one invalid id: path %ld | filename %ld\n",
+ pname_id, fname_id);
+ return -2;
+ }
+
+ if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind pname_id %ld to prepared statement: %s\n", pname_id, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind fname_id %ld to prepared statement: %s\n", fname_id, sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ /* Dont' forget: the sqlite3_reset() call must be executed! */
+ if (strc == SQLITE_ROW) {
+ result = (int64_t) sqlite3_column_int64(st, 0);
+ } else if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn));
+ result = -2;
+ } else { /* SQLITE_DONE => EMPTY */
+ DBGTRC("DEBUG: Combination not found in db\n");
+ result = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return result;
+}
+
+int dbi_update_fileinfo_last_seen(int64_t id) {
+ int rc = -1, strc = 0;
+ time_t ts = time(NULL);
+ sqlite3_stmt *st = update_fileinfo_last_seen;
+
+ DBCONN_CHECK( -2 );
+
+ if (id < 1) {
+ LOGERR("ERROR: Invalid id.\n");
+ return -1;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_int64(st, 1, ts) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 2, id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed to update last_seen timestamp for entry %ld: %s\n", id, sqlite3_errmsg(dbconn));
+ rc = -2;
+ } else {
+ rc = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return rc;
+}
+
+int update_fileinfo_function(struct df_fileinfo *fi, int64_t existing_id,
+ int64_t pname_id, int64_t fname_id, int64_t hashes_id) {
+ int rc = -1, strc = 0;
+ time_t ts = time(NULL);
+ sqlite3_stmt *st = update_fileinfo_complete;
+
+ DBCONN_CHECK( -2 );
+
+ if (fi == NULL) {
+ LOGERR("ERROR: Invalid argument.\n");
+ return -2;
+ }
+
+ if (existing_id < 1) {
+ /* TODO: ALL possible situations need to be checked */
+ if (fname_id < 1) {
+ fname_id = dbi_select_filename_by_name(fi->name);
+ }
+
+ if (pname_id < 1) {
+ pname_id = dbi_select_path_by_pathname(fi->path);
+ }
+ } else {
+ LOGERR("ERROR: No entry given.\n");
+ return -2;
+ }
+
+ /* Possibly new hashes so always INSERT and use the return which was given */
+ if (hashes_id < 1) {
+ if (dbi_insert_hashes(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512) < 0) {
+ LOGERR("ERROR: abort due to previous error.\n");
+ return -2;
+ }
+ hashes_id = dbi_select_hashes_by_strings(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512);
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind path_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 3, hashes_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 4, fi->statbuf.st_size) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind size to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 5, ts) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_blob(st, 6, &(fi->statbuf), sizeof(struct stat), SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+ strc = sqlite3_step(st);
+ if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed to completely update entry %ld: %s\n", existing_id, sqlite3_errmsg(dbconn));
+ rc = -2;
+ } else {
+ rc = 0;
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return rc;
+}
+
+int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id) {
+ return update_fileinfo_function(fi, existing_id, -1, -1, -1);
+}
+
+/**
+ * Wrapper function around several other inserts
+ */
+int dbi_insert_fileinfo(struct df_fileinfo *fi) {
+ int rc = 0, strc = 0;
+ int64_t fname_id, pname_id, hashes_id, existing_entry = 0;
+ time_t ts = 0;
+ sqlite3_stmt *st = insert_fileinfo;
+
+
+ DBCONN_CHECK(-2);
+
+ if (fi == NULL) {
+ LOGERR("ERROR: No fileinfo given.\n");
+ return -2;
+ }
+
+ if (dbi_insert_filename(fi->name) < 0) {
+ LOGERR("ERROR: abort due to previous error.\n");
+ return -2;
+ }
+ fname_id = dbi_select_filename_by_name(fi->name);
+
+ if (dbi_insert_pathname(fi->path) < 0) {
+ LOGERR("ERROR: abort due to previous error.\n");
+ return -2;
+ }
+ pname_id = dbi_select_path_by_pathname(fi->path);
+
+
+ /* TODO: Take some time and decide whether it shall stay like this or hand over the struct */
+ if (dbi_insert_hashes(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512) < 0) {
+ LOGERR("ERROR: abort due to previous error.\n");
+ return -2;
+ }
+ hashes_id = dbi_select_hashes_by_strings(fi->hashes.blake2, fi->hashes.sha256, fi->hashes.sha512);
+
+ /* Any problems with the selects? */
+ if (fname_id <0 || pname_id <0 || hashes_id <0) {
+ LOGERR("ERROR: abort due to previous error.\n");
+ return -2;
+ }
+
+ ts = time(NULL);
+
+ /* TODO: There also belongs a query whether the fullpath already has an entry, if so and hash_id
+ * is different, an UPDATE and not an insert is required.
+ */
+ existing_entry = dbi_select_fileinfo_by_hash_path_filename_ids(hashes_id, pname_id, fname_id);
+ if (existing_entry > 0) {
+ return dbi_update_fileinfo_last_seen(existing_entry);
+ }
+
+ /* fullpath entry exists, but the hashes mismatch. */
+ existing_entry = dbi_select_fileinfo_by_path_filename_ids(pname_id, fname_id);
+ if (existing_entry > 0) {
+ return update_fileinfo_function(fi, existing_entry, pname_id, fname_id, hashes_id);
+ }
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ if (sqlite3_bind_int64(st, 1, pname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind path_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 2, fname_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 3, hashes_id) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind filename_id to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 4, fi->statbuf.st_size) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind size to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_int64(st, 5, ts) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+ if (sqlite3_bind_blob(st, 6, &(fi->statbuf), sizeof(struct stat), SQLITE_TRANSIENT) != SQLITE_OK) {
+ LOGERR("ERROR: Failed to bind last_seen timestamp to prepared statement: %s\n", sqlite3_errmsg(dbconn));
+ return -2;
+ }
+
+
+ strc = sqlite3_step(st);
+ if (strc != SQLITE_DONE) {
+ LOGERR("ERROR: Failed to insert fileinfo for %s/%s into db: %s\n", fi->path, fi->name, sqlite3_errmsg(dbconn));
+ rc = -2;
+ } else {
+ rc = 0;
+ }
+
+
+ sqlite3_clear_bindings(st);
+ sqlite3_reset(st);
+
+ return rc;
+};
+
+#if 0
+ *select_fileinfo_by_id,
+ *select_fileinfo_by_id_resolved,
+ *select_fileinfo_by_path_id,
+ *select_fileinfo_by_filename_id,
+ *select_fileinfo_by_path_filename_ids,
+ *select_fileinfo_by_hash_id;
+
+
+sqlite3_stmt *,
+
+sqlite3_stmt *delete_fileinfo_by_id;
+#endif
+
diff --git a/src/directory_scanner.c b/src/directory_scanner.c
new file mode 100644
index 0000000..27627d5
--- /dev/null
+++ b/src/directory_scanner.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * @file directory_scanner.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include "options.h"
+#include "directory_scanner.h"
+#include "kv_manager.h"
+#include "trace_macros.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+struct path_stack {
+ struct path_stack *next;
+ char *fullpath;
+};
+
+/*=========== GLOBAL VARIABLES ===========*/
+struct path_stack *pstack = NULL;
+
+
+/*=========== FUNCTIONS ===========*/
+
+int pstack_push(char *s);
+char *pstack_pop();
+
+
+inline int pstack_push(char *s) {
+ struct path_stack *node;
+ if (s == NULL || s[0] == '\0') {
+ LOGERR("ERROR: Empty path\n");
+ return -1;
+ }
+
+ DBGTRC("DEBUG: push %s to pstack\n", s);
+
+ if ((node = calloc(1, sizeof(struct path_stack))) == NULL) {
+ LOGERR("ERROR: Failed to allocate 16bytes...\n");
+ return -1;
+ }
+
+ node->next = pstack;
+ node->fullpath = s;
+ pstack = node;
+ return 0;
+}
+
+
+inline char *pstack_pop() {
+ struct path_stack *node = pstack;
+ char *s;
+
+ if (pstack == NULL) return NULL;
+
+ DBGTRC("DEBUG: pop %s from pstack\n", node->fullpath);
+
+ s = node->fullpath;
+ pstack = node->next;
+ free(node);
+
+ return s;
+}
+
+
+/**
+ * Traverse the directory tree from the given starting point and add everything to
+ * the gdbm storage.
+ *
+ * @param starting_point The directory where the travel begins. If NULL or empty the current
+ * path "." is used.
+ * @return 0 on success
+ * <0 on failure
+ */
+int traverse_directory_tree(const char *starting_point) {
+ char *tmp;
+ int rc = 0;
+
+ if (starting_point == NULL || starting_point[0] == '\0') {
+ LOGERR("WARNING: No starting point given, begin at \".\"\n");
+ tmp = calloc(2, 1);
+ tmp[0]= '.';
+ } else {
+ tmp = calloc(1, strlen(starting_point)+1);
+ memcpy(tmp, starting_point, strlen(starting_point));
+ }
+
+ /* In case there are still entries on an existing pstack, take them with you */
+ while (tmp != NULL) {
+ DBGTRC("DEBUG: process directory %s next\n", tmp);
+ rc = process_directory(tmp);
+ if (rc < 0) { return -1; }
+ if (tmp != starting_point) { free(tmp); }
+ tmp = pstack_pop();
+ }
+
+ return 0;
+}
+
+/**
+ * Read directory the content is to be stored in a key:value storage with value being
+ * a boolean flagging the processment state. At the end flag the own entry as true.
+ *
+ * @param path struct contains the path of the file to read, results will be
+ * stored there.
+ *
+ * @return 0 on success
+ * -1 on failure
+ */
+int process_directory(char *path) {
+ char *fullpath = NULL, *fname_in_path = NULL, *stack_entry = NULL;
+ char type = 0;
+ size_t path_length = 0;
+ DIR *dir;
+ struct dirent *de = NULL;
+ struct stat stat_res;
+
+ /* February 2026: Modern filesystems usually have their maximal fullpath length
+ * at 4096 bytes. At least to my knowledge, so if this changes the checks need
+ * to be corrected.
+ */
+ if (path == NULL || ((path_length = strnlen(path,4100)) == 0)) {
+ LOGERR("ERROR: No path given.\n");
+ return -1;
+ }
+
+ if (path_length > 4096) {
+ LOGERR("ERROR: path longer than 4096 byte.\n");
+ return -1;
+ }
+
+ /* 256 byte max filename + path separator + \0 */
+ if ((fullpath = calloc(path_length+258, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for fullpath.\n");
+ return -1;
+ }
+ sprintf(fullpath, "%s%c", path, ((path[path_length-1] == PATH_SEP) ? '\0' : PATH_SEP));
+ fname_in_path = (fullpath[path_length] == PATH_SEP) ? &(fullpath[path_length+1]) : &(fullpath[path_length]) ;
+
+ if ((dir = opendir(path)) == NULL) {
+ LOGERR("ERROR: Failed to open directory '%s': %s (errno %d)\n",
+ path, strerror(errno), errno);
+ free(fullpath);
+ return -1;
+ }
+
+ while ((de = readdir(dir)) != NULL) {
+ if (de->d_name[0] == '.' && option_show_hidden_entries == false)
+ continue;
+
+ sprintf(fname_in_path, "%s", de->d_name);
+ DBGTRC("DEBUG: fullpath: '%s'\n", fullpath);
+ if (lstat(fullpath, &stat_res) != 0) {
+ LOGERR("ERROR: lstat call on '%s' failed: %s (errno %d)\n",
+ fullpath, strerror(errno), errno);
+ continue;
+ }
+ if (S_ISDIR(stat_res.st_mode)) {
+ type = 'D';
+ if ((stack_entry=calloc(1, strlen(fullpath)+1)) == NULL) {
+ LOGERR("ERROR: memory allocation failed\n");
+ /* saving cleanup, this error is a crash either way. With this
+ * location in the code may only be reached in a debugger. */
+ return -1;
+ }
+ memcpy(stack_entry, fullpath, strlen(fullpath));
+ pstack_push(stack_entry);
+ } else if (S_ISREG(stat_res.st_mode)) {
+ type = 'F';
+ } else {
+ DBGTRC("DEBUG: Not a file or directory ... ignoring.\n");
+ continue;
+ }
+
+ /* Don't overwrite earlier runs */
+ if (!kv_entry_exists(fullpath)) {
+ /* Ignore errors, missing entries shall show up in the error log
+ * and require manual intervention */
+ kv_add_bool_type(fullpath, false, type);
+ }
+ }
+ free(fullpath);
+ closedir(dir);
+
+ if (!kv_entry_exists(path)) {
+ if (path[0] == '.' && option_show_hidden_entries == false) {
+ return 0;
+ }
+ kv_add_bool_type(path, true, 'D');
+ } else {
+ kv_set_bool(path, true);
+ }
+
+ return 0;
+}
+
+
diff --git a/src/duplicate_finder.c b/src/duplicate_finder.c
new file mode 100644
index 0000000..bd679e9
--- /dev/null
+++ b/src/duplicate_finder.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * Keep track of files across different paths or filesystems with a sqlite db
+ * and identify duplicates. This utility should be used for housekeeping when
+ * spreading data across multiple FS / integrate old disks into newer setups.
+ *
+ * The DB stores SHA512 and SHA256 hashes calculated with the OpenSSL library,
+ * path and filenames and their corresponding stat() FS data.
+ *
+ * @file duplicate_finder.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+
+#include <stdlib.h>
+
+#include "trace_macros.h"
+#include "options.h"
+#include "kv_manager.h"
+#include "directory_scanner.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+/*=========== GLOBAL VARIABLES ===========*/
+
+/*=========== FUNCTIONS ===========*/
+
+int main(int argc, char **argv) {
+ int path_index = 1;
+
+ if (argc > 1) {
+ path_index = parse_arguments(argc, argv);
+ }
+
+ /* TODO: as option */
+ kv_open_storage("/tmp/duplicate_finder.gdbm");
+ process_directory((path_index == argc) ? argv[path_index] : ".");
+
+ kv_dump(stdout);
+
+ kv_close_storage();
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/src/file_processor.c b/src/file_processor.c
index 1cfed46..f4f9d05 100644
--- a/src/file_processor.c
+++ b/src/file_processor.c
@@ -21,7 +21,9 @@
#include "file_processor.h"
#include "trace_macros.h"
-
+#include "hex_conversion.h"
+#include "kv_manager.h"
+#include "database_interaction.h"
/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
@@ -214,7 +216,7 @@ int process_file(struct df_fileinfo *info) {
LOGERR("ERROR: Non-regular files are not processed.\n");
return -1;
}
-
+
if ((ctx_pkg = init_md_components()) == NULL) {
LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n",
fullpath);
@@ -270,23 +272,121 @@ int process_file(struct df_fileinfo *info) {
destroy_md_components(ctx_pkg);
return -1;
}
- memcpy(info->blake2, md_val, md_len);
+ convert_from_binary(md_val, md_len, info->hashes.blake2);
if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha256, md_val, &md_len) != 1) {
LOGERR("ERROR: Failed to finalize MD SHA256 of file '%s'\n", fullpath);
destroy_md_components(ctx_pkg);
return -1;
}
- memcpy(info->sha256, md_val, md_len);
+ convert_from_binary(md_val, md_len, info->hashes.sha256);
if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha512, md_val, &md_len) != 1) {
LOGERR("ERROR: Failed to finalize MD SHA512 of file '%s'\n", fullpath);
destroy_md_components(ctx_pkg);
return -1;
}
- memcpy(info->sha512, md_val, md_len);
+ convert_from_binary(md_val, md_len, info->hashes.sha512);
return 0;
}
+/**
+ * Return a file info struct with path and filename fields filled.
+ * @param key the fullpath used as key in the gdbm.
+ * @return NULL on failure
+ */
+struct df_fileinfo *prepare_fileinfo(char *key) {
+ char *tmp;
+ char *fname = NULL;
+ size_t plen=0;
+ struct df_fileinfo *info = NULL;
+
+ if (key == NULL || key[0] == '\0') {
+ return NULL;
+ }
+ if ((fname=strrchr(key, '/')) == NULL) {
+ LOGERR("ERROR: path<->filename separation failed with '%s'\n", key);
+ return NULL;
+ }
+
+ /* At this point the address of fname will always be equal or larger than keys */
+ plen = (size_t) (fname - key);
+ fname++; /* drop the / */
+
+ if ((info=calloc(1, sizeof(struct df_fileinfo))) == NULL) {
+ return NULL;
+ }
+
+ if ((tmp = calloc(plen+1, sizeof(char))) == NULL) {
+ free(info);
+ return NULL;
+ }
+ memcpy(tmp, key, plen);
+ info->path = tmp;
+
+ if ((tmp = calloc(strlen(fname)+1, sizeof(char))) == NULL) {
+ free(info->path);
+ free(info);
+ return NULL;
+ }
+ memcpy(tmp, fname, plen);
+ info->name = tmp;
+
+ return info;
+}
+
+/**
+ * Iterate over the whole gdbm content. If an entry is an unprocessed file,
+ * process it, place the information in the database and set it as processed
+ * in the storage.
+ * @return 0 on success
+ * <0 on failure
+ */
+int process_gdbm_content() {
+ char *key, *tmpkey;
+ struct df_fileinfo *info;
+ int dbrc = 0;
+
+ key = kv_first_key();
+ while (key != NULL) {
+ /* file? already processed? */
+ if (kv_get_type(key) == 'D' || kv_get_bool(key)) {
+ tmpkey = key;
+ key = kv_next_key(tmpkey);
+ free(tmpkey);
+ continue;
+ }
+
+ info = prepare_fileinfo(key);
+ if (info == NULL) {
+ LOGERR("ERROR: Preparing struct for key %s failed.\n", key);
+ return -1;
+ }
+ if (process_file(info) < 0) {
+ free(info->path);
+ free(info->name);
+ free(info);
+ return -1;
+ }
+
+ dbrc = dbi_insert_fileinfo(info);
+ free(info->path);
+ free(info->name);
+ free(info);
+ if (dbrc < 0) {
+ LOGERR("ERROR: Aborting after database error.\n");
+ return -1;
+ }
+
+
+ kv_set_bool(key, true);
+
+ tmpkey = key;
+ key = kv_next_key(tmpkey);
+ free(tmpkey);
+ }
+
+ return 0;
+}
diff --git a/src/kv_manager.c b/src/kv_manager.c
new file mode 100644
index 0000000..df95faf
--- /dev/null
+++ b/src/kv_manager.c
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * For now working with gdbm, which is to my knowledge ported to every
+ * important OS (GNU, BSD, UNIX (AIX, MacOS), Windows) and probably also
+ * many of the less spread ones.
+ *
+ * This key value storage contains the processable filesystem entries,
+ * meaning files and directories and whether they have been processed.
+ *
+ * The key is the path and fsentry name.
+ *
+ * The value a two byte char array with the first representing true or
+ * false via 'T' or 'F', the second the type directory 'D', file 'F' or
+ * '\0' if unknown.
+ *
+ * The unknown type should never show up.
+ *
+ * @file kv_manager.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include <gdbm.h>
+
+#include "kv_manager.h"
+#include "trace_macros.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+/*=========== GLOBAL VARIABLES ===========*/
+GDBM_FILE gdbf;
+
+/*=========== FUNCTIONS ===========*/
+bool add_b_t_wrapped(char *key, bool value, char type, bool keep_original_type);
+
+/**
+ * Open the key value storage used by the manager
+ *
+ * @param fname filename of the storage, for in-memory place it on a tmpfs
+ *
+ * @return 0 on success
+ * -1 on wrong filename
+ * -2 on already open gdbm file
+ * -3 on failure when opening or creating the db file
+ */
+int kv_open_storage(char *fname) {
+ if (fname == NULL || fname[0] == '\0') {
+ LOGERR("ERROR: No valid filename\n");
+ return false;
+ }
+
+ if (gdbf != NULL) {
+ LOGERR("ERROR: Already a gdbm opened not switching\n");
+ return false;
+ }
+
+ /* Currently CLOEXEC is obsolete, as no exec calls are planned */
+ gdbf = gdbm_open(fname, 0, GDBM_WRCREAT | GDBM_CLOEXEC | GDBM_XVERIFY, 0644, NULL);
+ if (gdbf == NULL) {
+ LOGERR("ERROR: Failed to open gdbm db: %s\n",
+ gdbm_strerror(gdbm_errno));
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Close the currently storage
+ *
+ * @return true on success or if no storage was open
+ */
+bool kv_close_storage() {
+ if (gdbf == NULL) {
+ DBGTRC("DEBUG: No gdbm file open\n");
+ return true;
+ }
+ if (gdbm_close(gdbf) != 0) {
+ /* Both gdbm_errno and errno are set, communicate the generic one */
+ LOGERR("ERROR: Failed to close gdbm file correctly: %s (errno: %d)\n",
+ gdbm_strerror(gdbm_errno), errno);
+ return false;
+ }
+ gdbf = NULL;
+ return true;
+}
+
+/**
+ * Local function to be wrapped by inserting/modifying ones.
+ *
+ * @param key keystring
+ * @param state the bool part of the value
+ * @param type the fs entry type of the value
+ * @param keep_original_type bool whether the type parameter should be set or not.
+ *
+ * @return true on success
+ * false on failure
+ */
+bool add_b_t_wrapped(char *key, bool state, char type, bool keep_original_type) {
+ datum k, v;
+ int fcall_rc;
+ char s[2] = { 0, 0 };
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return false;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key)+1;
+
+ v = gdbm_fetch(gdbf, k);
+
+ if ((v.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Failed to check key existence for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ return false;
+ }
+
+ if (v.dptr == NULL) {
+ v.dptr = s;
+ v.dsize = 2;
+ }
+
+ v.dptr[0] = (state) ? 'T' : 'F';
+ if (!keep_original_type) {
+ v.dptr[1] = type;
+ }
+
+ fcall_rc = gdbm_store(gdbf, k, v, GDBM_REPLACE);
+
+ if (v.dptr != s)
+ free(v.dptr);
+
+ if (fcall_rc < 0) {
+ LOGERR("ERROR: Failed store value for key '%s': %s\n",
+ key, strerror(gdbm_errno));
+ return false;
+ }
+
+ return true;
+}
+
+
+/**
+ * Add the entry for the given key. Replaces already existing keys
+ *
+ * @param state boolean
+ * @param type additional single byte information
+ *
+ * @return true on success
+ * false on failure
+ */
+bool kv_add_bool_type(char *key, bool state, char type) {
+ return add_b_t_wrapped(key, state, type, false);
+}
+
+/**
+ * Key is stored with given boolean value. If a key already exists the value is changed
+ * else a new key is inserted.
+ *
+ * @param key key to set
+ * @param value value to set
+ *
+ * @return true on success
+ * false on failure
+ */
+bool kv_set_bool(char *key, bool value) {
+ return add_b_t_wrapped(key, value, 0, true);
+}
+
+/**
+ * Get the bool part of the stored information. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return the bool part of the value
+ */
+bool kv_get_bool(char *key) {
+ char *raw = kv_get_raw(key);
+ bool rc;
+
+ if (raw == NULL) {
+ return false;
+ }
+
+ rc = (raw[0] == 'T') ? true : false;
+ free(raw);
+ return rc;
+}
+
+/**
+ * Get the type part of the stored information. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return the char represent the type
+ */
+char kv_get_type(char *key) {
+ char rc;
+ char *raw = kv_get_raw(key);
+
+ if (raw == NULL) {
+ return 0;
+ }
+
+ rc = raw[1];
+ free(raw);
+ return rc;
+}
+
+
+/**
+ * Get the raw value. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return NULL in case of error or no value stored
+ * pointer to a short array which needs to be freed after processment
+ */
+char *kv_get_raw(char *key) {
+ datum k, v;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return NULL;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ v = gdbm_fetch(gdbf, k);
+
+ if (gdbm_errno != GDBM_ITEM_NOT_FOUND) {
+ LOGERR("ERROR: Failed to fetch value for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+
+ return v.dptr;
+}
+
+
+/**
+ * Simple check if there's data for the key
+ *
+ * @param key the keystring
+ * @return bool whether entry exists
+ */
+bool kv_entry_exists(char *key) {
+ datum k;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return false;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ if (gdbm_exists(gdbf, k) == 1) {
+ return true;
+ }
+ if (gdbm_errno != GDBM_NO_ERROR) {
+ LOGERR("ERROR: Failed to verify existence of entry for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+ return false;
+}
+
+/**
+ * Wrapper to get the first key from the storage
+ * @return pointer to a key, needs to be freed manually
+ * NULL in case of error or an empty storage
+ */
+char *kv_first_key() {
+ datum k;
+ k = gdbm_firstkey(gdbf);
+ if ((k.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Request for first key failed: %s\n",
+ gdbm_strerror(gdbm_errno));
+ }
+ return k.dptr;
+}
+
+/**
+ * Wrapper to get the next key stored
+ * @return pointer to the key
+ * NULL if the end has been reached or an error occured
+ */
+char *kv_next_key(char *key) {
+ datum k, nk;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No valid key given\n");
+ return NULL;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ nk = gdbm_nextkey(gdbf, k);
+
+ if ((nk.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Request for next key after '%s' failed: %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+
+ return nk.dptr;
+}
+
+/**
+ * Dump the storage data to the given output stream in incomplete JSON format
+ *
+ * @param out the file stream, if NULL stdout is used
+ */
+void kv_dump(FILE *out) {
+ datum k, nk, v;
+
+ if (out == NULL) {
+ out = stdout;
+ }
+
+ k = gdbm_firstkey(gdbf);
+ while (k.dptr != NULL) {
+ v = gdbm_fetch(gdbf, k);
+
+ if (v.dptr != NULL) {
+ fprintf(out, "{k='%s', v='%c%c'},\n", k.dptr, v.dptr[0], v.dptr[1]);
+ free(v.dptr);
+ } else {
+ if (gdbm_errno == GDBM_ITEM_NOT_FOUND) {
+ fprintf(out, "{k='%s', v=NULL},}\n", k.dptr);
+ } else {
+ LOGERR("ERROR: Failed to get value for '%s': %s\n",
+ k.dptr, gdbm_strerror(gdbm_errno));
+ }
+ }
+
+ nk = gdbm_nextkey(gdbf, k);
+ free (k.dptr);
+ k = nk;
+ }
+}
diff --git a/src/options.c b/src/options.c
new file mode 100644
index 0000000..214a72c
--- /dev/null
+++ b/src/options.c
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+#include "options.h"
+#include "trace_macros.h"
+
+
+/* === GLOBAL VARIABLES === */
+struct option long_options[] = {
+ { "help", no_argument, 0, 0 },
+ { "quiet", no_argument, 0, 0 },
+ { "show-hidden-entries", no_argument, 0, 0 },
+ { 0, 0, 0, 0 }
+};
+
+bool option_quiet = false;
+bool option_show_hidden_entries = false;
+
+char *exec_name;
+
+/* === IMPLEMENTATION === */
+
+void usage(char *executable) {
+ fprintf(stderr, "Call: %s OPTIONS path_to_open\n", executable);
+ fprintf(stderr, "\nOPTIONS are\n");
+ /* long name, short name, optional argument, explanation */
+ fprintf(stderr, " %-25s %2s %10s - %s\n", "--help", "-h", "",
+ "Show this message and exit");
+ fprintf(stderr, " %-25s %2s %10s - %s\n", "--quiet", "-q", "",
+ "Don't print error messages or warnings");
+ fprintf(stderr, " %-25s %2s %10s - %s\n", "--show-hidden-entries", "-a", "",
+ "Show hidden entries in the directory");
+}
+
+
+void set_option(const char *option_name, char *option_argument) {
+
+ DBGTRC("DEBUG: called with option_name '%s' and option_argument '%s'\n",
+ option_name, option_argument);
+
+ if (option_name == NULL)
+ return;
+
+ /* options WITHOUT arguments */
+ if (strcmp("help", option_name) == 0) {
+ usage(exec_name);
+ exit(EXIT_SUCCESS);
+ }
+
+ if (strcmp("quiet", option_name) == 0) {
+ option_quiet = true;
+ return;
+ }
+
+ if (strcmp("show-hidden-entries", option_name) == 0) {
+ option_show_hidden_entries = true;
+ return;
+ }
+
+ /* options WITH arguments */
+ if (option_argument == NULL || option_argument[0] == '\0') {
+ LOGERR("ERROR: option_name %s with missing option_argument\n",
+ option_name);
+ exit(EXIT_FAILURE);
+ }
+
+ LOGERR("ERROR: Option '%s' not recognized\n.", option_name);
+}
+
+
+int parse_arguments(int argc, char **argv) {
+ int c = 0, index;
+ /* exec_name is a file internal global variable for --help in set_option() */
+ exec_name = argv[0];
+
+ while(1) {
+ index = 0;
+ c = getopt_long(argc, argv, "hqs", long_options, &index);
+
+ if (c == -1) {
+ break;
+ }
+
+ switch (c) {
+ case 0:
+ set_option(long_options[index].name, optarg);
+ break;
+ case 'h':
+ usage(exec_name);
+ exit(EXIT_SUCCESS);
+ case 'q':
+ option_quiet = true;
+ break;
+ case 's':
+ option_show_hidden_entries = true;
+ break;
+ case '?':
+ break;
+ default:
+ LOGERR("ERROR: unrecognized option 0x%02X '%c'\n", c, c);
+ usage(exec_name);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ return optind;
+}
+