/* SPDX-License-Identifier: Apache-2.0 */ /** * Copyright 2026 Thorsten Töpper * * Keep track of files across different paths or filesystems with a sqlite db * and identify duplicates. This utility should be used for housekeeping when * spreading data across multiple FS / integrate old disks into newer setups. * * The DB stores SHA512 and SHA256 hashes calculated with the OpenSSL library, * path and filenames and their corresponding stat() FS data. * * @file duplicate_finder.c * * vim:ts=4:sw=4:expandtab */ #include #include "trace_macros.h" #include "options.h" #include "kv_manager.h" #include "directory_scanner.h" #include "database_interaction.h" /*=========== DEFINES, CONSTANTS AND TYPES ===========*/ /*=========== GLOBAL VARIABLES ===========*/ /*=========== FUNCTIONS ===========*/ int analyze_db_content(); int scan (const char *path); /** * The wrapper around automated DB content analysis. * * @return EXIT_SUCCESS on success * EXIT_FAILURE on failure */ int analyze_db_content() { if ( ! dbi_open(option_sqlite_db_name) ) { return EXIT_FAILURE; } /* TODO: Implementation of several SQL queries... not in the mood */ dbi_print_fileinfo_resolved(stdout); printf("\n\n---- IDENTICAL HASHES ----\n\n"); if (dbi_print_identical_hashes(stdout) < 0) { LOGERR("ERROR: Identification of duplicates via hashes failed.\n"); dbi_close(); return EXIT_FAILURE; } printf("\n\n---- IDENTICAL FILENAMES ----\n\n"); if (dbi_print_identical_filenames(stdout) < 0) { LOGERR("ERROR: Identification of duplicates via filenames failed.\n"); dbi_close(); return EXIT_FAILURE; } dbi_close(); return EXIT_SUCCESS; } /** * Scan the given path... * * @return EXIT_SUCCESS on success * EXIT_FAILURE on failure */ int scan(const char *path) { if ( ! kv_open_storage(option_gdbm_db_name) ) { return EXIT_FAILURE; } if ( ! dbi_open(option_sqlite_db_name) ) { return EXIT_FAILURE; } traverse_directory_tree(path); #ifdef DEBUGBUILD kv_dump(stdout); #endif process_gdbm_content(); #ifdef DEBUGBUILD kv_dump(stdout); #endif /* TODO: Implement signal handlers and add the close for sqlite and gdbm dbs there */ kv_close_storage(); dbi_close(); return EXIT_SUCCESS; } int main(int argc, char **argv) { int path_index = 1; if (argc > 1) { path_index = parse_arguments(argc, argv); } else { LOGERR("ERROR: Too few arguments given, see --help or man.\n"); return EXIT_FAILURE; } DBGTRC("Database: %s\n", option_sqlite_db_name); DBGTRC("KVStorage: %s\n", option_gdbm_db_name); if (option_mode == MODE_SCAN) { return scan((path_index == argc) ? "." : argv[path_index] ); } if (option_mode == MODE_ANALYZE_DB) { return analyze_db_content(); } LOGERR("ERROR: No proper modus operandi, the dev missed something.\n"); return EXIT_FAILURE; }