aboutsummaryrefslogtreecommitdiff
path: root/src/kv_manager.c
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
committerThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
commitd513977a3566b14d9357906615d045d71741537f (patch)
tree3e707d2de9da71d98650fa8bb1b92ed11ab724ba /src/kv_manager.c
parenteed2d1323441861f2d41f0ecc0a72fcc9190fa5f (diff)
downloadduplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.gz
duplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.bz2
squash initial implementation
Diffstat (limited to 'src/kv_manager.c')
-rw-r--r--src/kv_manager.c347
1 files changed, 347 insertions, 0 deletions
diff --git a/src/kv_manager.c b/src/kv_manager.c
new file mode 100644
index 0000000..df95faf
--- /dev/null
+++ b/src/kv_manager.c
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * For now working with gdbm, which is to my knowledge ported to every
+ * important OS (GNU, BSD, UNIX (AIX, MacOS), Windows) and probably also
+ * many of the less spread ones.
+ *
+ * This key value storage contains the processable filesystem entries,
+ * meaning files and directories and whether they have been processed.
+ *
+ * The key is the path and fsentry name.
+ *
+ * The value a two byte char array with the first representing true or
+ * false via 'T' or 'F', the second the type directory 'D', file 'F' or
+ * '\0' if unknown.
+ *
+ * The unknown type should never show up.
+ *
+ * @file kv_manager.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include <gdbm.h>
+
+#include "kv_manager.h"
+#include "trace_macros.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+/*=========== GLOBAL VARIABLES ===========*/
+GDBM_FILE gdbf;
+
+/*=========== FUNCTIONS ===========*/
+bool add_b_t_wrapped(char *key, bool value, char type, bool keep_original_type);
+
+/**
+ * Open the key value storage used by the manager
+ *
+ * @param fname filename of the storage, for in-memory place it on a tmpfs
+ *
+ * @return 0 on success
+ * -1 on wrong filename
+ * -2 on already open gdbm file
+ * -3 on failure when opening or creating the db file
+ */
+int kv_open_storage(char *fname) {
+ if (fname == NULL || fname[0] == '\0') {
+ LOGERR("ERROR: No valid filename\n");
+ return false;
+ }
+
+ if (gdbf != NULL) {
+ LOGERR("ERROR: Already a gdbm opened not switching\n");
+ return false;
+ }
+
+ /* Currently CLOEXEC is obsolete, as no exec calls are planned */
+ gdbf = gdbm_open(fname, 0, GDBM_WRCREAT | GDBM_CLOEXEC | GDBM_XVERIFY, 0644, NULL);
+ if (gdbf == NULL) {
+ LOGERR("ERROR: Failed to open gdbm db: %s\n",
+ gdbm_strerror(gdbm_errno));
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Close the currently storage
+ *
+ * @return true on success or if no storage was open
+ */
+bool kv_close_storage() {
+ if (gdbf == NULL) {
+ DBGTRC("DEBUG: No gdbm file open\n");
+ return true;
+ }
+ if (gdbm_close(gdbf) != 0) {
+ /* Both gdbm_errno and errno are set, communicate the generic one */
+ LOGERR("ERROR: Failed to close gdbm file correctly: %s (errno: %d)\n",
+ gdbm_strerror(gdbm_errno), errno);
+ return false;
+ }
+ gdbf = NULL;
+ return true;
+}
+
+/**
+ * Local function to be wrapped by inserting/modifying ones.
+ *
+ * @param key keystring
+ * @param state the bool part of the value
+ * @param type the fs entry type of the value
+ * @param keep_original_type bool whether the type parameter should be set or not.
+ *
+ * @return true on success
+ * false on failure
+ */
+bool add_b_t_wrapped(char *key, bool state, char type, bool keep_original_type) {
+ datum k, v;
+ int fcall_rc;
+ char s[2] = { 0, 0 };
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return false;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key)+1;
+
+ v = gdbm_fetch(gdbf, k);
+
+ if ((v.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Failed to check key existence for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ return false;
+ }
+
+ if (v.dptr == NULL) {
+ v.dptr = s;
+ v.dsize = 2;
+ }
+
+ v.dptr[0] = (state) ? 'T' : 'F';
+ if (!keep_original_type) {
+ v.dptr[1] = type;
+ }
+
+ fcall_rc = gdbm_store(gdbf, k, v, GDBM_REPLACE);
+
+ if (v.dptr != s)
+ free(v.dptr);
+
+ if (fcall_rc < 0) {
+ LOGERR("ERROR: Failed store value for key '%s': %s\n",
+ key, strerror(gdbm_errno));
+ return false;
+ }
+
+ return true;
+}
+
+
+/**
+ * Add the entry for the given key. Replaces already existing keys
+ *
+ * @param state boolean
+ * @param type additional single byte information
+ *
+ * @return true on success
+ * false on failure
+ */
+bool kv_add_bool_type(char *key, bool state, char type) {
+ return add_b_t_wrapped(key, state, type, false);
+}
+
+/**
+ * Key is stored with given boolean value. If a key already exists the value is changed
+ * else a new key is inserted.
+ *
+ * @param key key to set
+ * @param value value to set
+ *
+ * @return true on success
+ * false on failure
+ */
+bool kv_set_bool(char *key, bool value) {
+ return add_b_t_wrapped(key, value, 0, true);
+}
+
+/**
+ * Get the bool part of the stored information. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return the bool part of the value
+ */
+bool kv_get_bool(char *key) {
+ char *raw = kv_get_raw(key);
+ bool rc;
+
+ if (raw == NULL) {
+ return false;
+ }
+
+ rc = (raw[0] == 'T') ? true : false;
+ free(raw);
+ return rc;
+}
+
+/**
+ * Get the type part of the stored information. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return the char represent the type
+ */
+char kv_get_type(char *key) {
+ char rc;
+ char *raw = kv_get_raw(key);
+
+ if (raw == NULL) {
+ return 0;
+ }
+
+ rc = raw[1];
+ free(raw);
+ return rc;
+}
+
+
+/**
+ * Get the raw value. CAUTION: First check separately whether key exists!
+ *
+ * @param key the key string
+ * @return NULL in case of error or no value stored
+ * pointer to a short array which needs to be freed after processment
+ */
+char *kv_get_raw(char *key) {
+ datum k, v;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return NULL;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ v = gdbm_fetch(gdbf, k);
+
+ if (gdbm_errno != GDBM_ITEM_NOT_FOUND) {
+ LOGERR("ERROR: Failed to fetch value for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+
+ return v.dptr;
+}
+
+
+/**
+ * Simple check if there's data for the key
+ *
+ * @param key the keystring
+ * @return bool whether entry exists
+ */
+bool kv_entry_exists(char *key) {
+ datum k;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No key given.\n");
+ return false;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ if (gdbm_exists(gdbf, k) == 1) {
+ return true;
+ }
+ if (gdbm_errno != GDBM_NO_ERROR) {
+ LOGERR("ERROR: Failed to verify existence of entry for key '%s': %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+ return false;
+}
+
+/**
+ * Wrapper to get the first key from the storage
+ * @return pointer to a key, needs to be freed manually
+ * NULL in case of error or an empty storage
+ */
+char *kv_first_key() {
+ datum k;
+ k = gdbm_firstkey(gdbf);
+ if ((k.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Request for first key failed: %s\n",
+ gdbm_strerror(gdbm_errno));
+ }
+ return k.dptr;
+}
+
+/**
+ * Wrapper to get the next key stored
+ * @return pointer to the key
+ * NULL if the end has been reached or an error occured
+ */
+char *kv_next_key(char *key) {
+ datum k, nk;
+
+ if (key == NULL || key[0] == '\0') {
+ LOGERR("ERROR: No valid key given\n");
+ return NULL;
+ }
+
+ k.dptr = key;
+ k.dsize = (int)strlen(key);
+
+ nk = gdbm_nextkey(gdbf, k);
+
+ if ((nk.dptr == NULL) && (gdbm_errno != GDBM_ITEM_NOT_FOUND)) {
+ LOGERR("ERROR: Request for next key after '%s' failed: %s\n",
+ key, gdbm_strerror(gdbm_errno));
+ }
+
+ return nk.dptr;
+}
+
+/**
+ * Dump the storage data to the given output stream in incomplete JSON format
+ *
+ * @param out the file stream, if NULL stdout is used
+ */
+void kv_dump(FILE *out) {
+ datum k, nk, v;
+
+ if (out == NULL) {
+ out = stdout;
+ }
+
+ k = gdbm_firstkey(gdbf);
+ while (k.dptr != NULL) {
+ v = gdbm_fetch(gdbf, k);
+
+ if (v.dptr != NULL) {
+ fprintf(out, "{k='%s', v='%c%c'},\n", k.dptr, v.dptr[0], v.dptr[1]);
+ free(v.dptr);
+ } else {
+ if (gdbm_errno == GDBM_ITEM_NOT_FOUND) {
+ fprintf(out, "{k='%s', v=NULL},}\n", k.dptr);
+ } else {
+ LOGERR("ERROR: Failed to get value for '%s': %s\n",
+ k.dptr, gdbm_strerror(gdbm_errno));
+ }
+ }
+
+ nk = gdbm_nextkey(gdbf, k);
+ free (k.dptr);
+ k = nk;
+ }
+}