aboutsummaryrefslogtreecommitdiff
path: root/src/directory_scanner.c
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
committerThorsten Töpper <atsutane@freethoughts.de>2026-02-18 21:51:53 +0100
commitd513977a3566b14d9357906615d045d71741537f (patch)
tree3e707d2de9da71d98650fa8bb1b92ed11ab724ba /src/directory_scanner.c
parenteed2d1323441861f2d41f0ecc0a72fcc9190fa5f (diff)
downloadduplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.gz
duplicate_finder-d513977a3566b14d9357906615d045d71741537f.tar.bz2
squash initial implementation
Diffstat (limited to 'src/directory_scanner.c')
-rw-r--r--src/directory_scanner.c211
1 files changed, 211 insertions, 0 deletions
diff --git a/src/directory_scanner.c b/src/directory_scanner.c
new file mode 100644
index 0000000..27627d5
--- /dev/null
+++ b/src/directory_scanner.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * @file directory_scanner.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include "options.h"
+#include "directory_scanner.h"
+#include "kv_manager.h"
+#include "trace_macros.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+struct path_stack {
+ struct path_stack *next;
+ char *fullpath;
+};
+
+/*=========== GLOBAL VARIABLES ===========*/
+struct path_stack *pstack = NULL;
+
+
+/*=========== FUNCTIONS ===========*/
+
+int pstack_push(char *s);
+char *pstack_pop();
+
+
+inline int pstack_push(char *s) {
+ struct path_stack *node;
+ if (s == NULL || s[0] == '\0') {
+ LOGERR("ERROR: Empty path\n");
+ return -1;
+ }
+
+ DBGTRC("DEBUG: push %s to pstack\n", s);
+
+ if ((node = calloc(1, sizeof(struct path_stack))) == NULL) {
+ LOGERR("ERROR: Failed to allocate 16bytes...\n");
+ return -1;
+ }
+
+ node->next = pstack;
+ node->fullpath = s;
+ pstack = node;
+ return 0;
+}
+
+
+inline char *pstack_pop() {
+ struct path_stack *node = pstack;
+ char *s;
+
+ if (pstack == NULL) return NULL;
+
+ DBGTRC("DEBUG: pop %s from pstack\n", node->fullpath);
+
+ s = node->fullpath;
+ pstack = node->next;
+ free(node);
+
+ return s;
+}
+
+
+/**
+ * Traverse the directory tree from the given starting point and add everything to
+ * the gdbm storage.
+ *
+ * @param starting_point The directory where the travel begins. If NULL or empty the current
+ * path "." is used.
+ * @return 0 on success
+ * <0 on failure
+ */
+int traverse_directory_tree(const char *starting_point) {
+ char *tmp;
+ int rc = 0;
+
+ if (starting_point == NULL || starting_point[0] == '\0') {
+ LOGERR("WARNING: No starting point given, begin at \".\"\n");
+ tmp = calloc(2, 1);
+ tmp[0]= '.';
+ } else {
+ tmp = calloc(1, strlen(starting_point)+1);
+ memcpy(tmp, starting_point, strlen(starting_point));
+ }
+
+ /* In case there are still entries on an existing pstack, take them with you */
+ while (tmp != NULL) {
+ DBGTRC("DEBUG: process directory %s next\n", tmp);
+ rc = process_directory(tmp);
+ if (rc < 0) { return -1; }
+ if (tmp != starting_point) { free(tmp); }
+ tmp = pstack_pop();
+ }
+
+ return 0;
+}
+
+/**
+ * Read directory the content is to be stored in a key:value storage with value being
+ * a boolean flagging the processment state. At the end flag the own entry as true.
+ *
+ * @param path struct contains the path of the file to read, results will be
+ * stored there.
+ *
+ * @return 0 on success
+ * -1 on failure
+ */
+int process_directory(char *path) {
+ char *fullpath = NULL, *fname_in_path = NULL, *stack_entry = NULL;
+ char type = 0;
+ size_t path_length = 0;
+ DIR *dir;
+ struct dirent *de = NULL;
+ struct stat stat_res;
+
+ /* February 2026: Modern filesystems usually have their maximal fullpath length
+ * at 4096 bytes. At least to my knowledge, so if this changes the checks need
+ * to be corrected.
+ */
+ if (path == NULL || ((path_length = strnlen(path,4100)) == 0)) {
+ LOGERR("ERROR: No path given.\n");
+ return -1;
+ }
+
+ if (path_length > 4096) {
+ LOGERR("ERROR: path longer than 4096 byte.\n");
+ return -1;
+ }
+
+ /* 256 byte max filename + path separator + \0 */
+ if ((fullpath = calloc(path_length+258, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for fullpath.\n");
+ return -1;
+ }
+ sprintf(fullpath, "%s%c", path, ((path[path_length-1] == PATH_SEP) ? '\0' : PATH_SEP));
+ fname_in_path = (fullpath[path_length] == PATH_SEP) ? &(fullpath[path_length+1]) : &(fullpath[path_length]) ;
+
+ if ((dir = opendir(path)) == NULL) {
+ LOGERR("ERROR: Failed to open directory '%s': %s (errno %d)\n",
+ path, strerror(errno), errno);
+ free(fullpath);
+ return -1;
+ }
+
+ while ((de = readdir(dir)) != NULL) {
+ if (de->d_name[0] == '.' && option_show_hidden_entries == false)
+ continue;
+
+ sprintf(fname_in_path, "%s", de->d_name);
+ DBGTRC("DEBUG: fullpath: '%s'\n", fullpath);
+ if (lstat(fullpath, &stat_res) != 0) {
+ LOGERR("ERROR: lstat call on '%s' failed: %s (errno %d)\n",
+ fullpath, strerror(errno), errno);
+ continue;
+ }
+ if (S_ISDIR(stat_res.st_mode)) {
+ type = 'D';
+ if ((stack_entry=calloc(1, strlen(fullpath)+1)) == NULL) {
+ LOGERR("ERROR: memory allocation failed\n");
+ /* saving cleanup, this error is a crash either way. With this
+ * location in the code may only be reached in a debugger. */
+ return -1;
+ }
+ memcpy(stack_entry, fullpath, strlen(fullpath));
+ pstack_push(stack_entry);
+ } else if (S_ISREG(stat_res.st_mode)) {
+ type = 'F';
+ } else {
+ DBGTRC("DEBUG: Not a file or directory ... ignoring.\n");
+ continue;
+ }
+
+ /* Don't overwrite earlier runs */
+ if (!kv_entry_exists(fullpath)) {
+ /* Ignore errors, missing entries shall show up in the error log
+ * and require manual intervention */
+ kv_add_bool_type(fullpath, false, type);
+ }
+ }
+ free(fullpath);
+ closedir(dir);
+
+ if (!kv_entry_exists(path)) {
+ if (path[0] == '.' && option_show_hidden_entries == false) {
+ return 0;
+ }
+ kv_add_bool_type(path, true, 'D');
+ } else {
+ kv_set_bool(path, true);
+ }
+
+ return 0;
+}
+
+