/* SPDX-License-Identifier: Apache-2.0 */ /** * Copyright 2026 Thorsten Töpper * * Keep track of files across different paths or filesystems with a sqlite db * and identify duplicates. This utility should be used for housekeeping when * spreading data across multiple FS / integrate old disks into newer setups. * * The DB stores SHA512 and SHA256 hashes calculated with the OpenSSL library, * path and filenames and their corresponding stat() FS data. * * @file duplicate_finder.c * * vim:ts=4:sw=4:expandtab */ #include #include #include "trace_macros.h" #include "options.h" #include "kv_manager.h" #include "directory_scanner.h" #include "database_interaction.h" /*=========== DEFINES, CONSTANTS AND TYPES ===========*/ /*=========== GLOBAL VARIABLES ===========*/ /*=========== FUNCTIONS ===========*/ int analyze_db_content(); int scan (const char *path); int dump(int argc, char **argv, int pos); /** * The wrapper around automated DB content analysis. * * @return EXIT_SUCCESS on success * EXIT_FAILURE on failure */ int analyze_db_content() { if ( ! dbi_open(option_sqlite_db_name) ) { return EXIT_FAILURE; } if (option_show_non_duplicates) { dbi_print_fileinfo_resolved(stdout); printf("\n\n"); } printf("---- IDENTICAL HASHES ----\n\n"); if (dbi_print_identical_hashes(stdout) < 0) { LOGERR("ERROR: Identification of duplicates via hashes failed.\n"); dbi_close(); return EXIT_FAILURE; } printf("\n\n---- IDENTICAL FILENAMES ----\n\n"); if (dbi_print_identical_filenames(stdout) < 0) { LOGERR("ERROR: Identification of duplicates via filenames failed.\n"); dbi_close(); return EXIT_FAILURE; } dbi_close(); return EXIT_SUCCESS; } /** * dump requested database content * @param argv argv array from main() * @param pos The position where to begin in the array * @param argc The array size from main() * * @return EXIT_SUCCESS on success * EXIT_FAILURE on failure */ int dump(int argc, char **argv, int pos) { int i = pos; if (pos >= argc) { LOGERR("ERROR: Missing parameter\n"); return EXIT_FAILURE; } if ( ! dbi_open(option_sqlite_db_name) ) { return EXIT_FAILURE; } for (i=pos; i 1) { path_index = parse_arguments(argc, argv); } else { LOGERR("ERROR: Too few arguments given, see --help or man.\n"); return EXIT_FAILURE; } DBGTRC("Database: %s\n", option_sqlite_db_name); DBGTRC("KVStorage: %s\n", option_gdbm_db_name); if (option_mode == MODE_SCAN) { return scan((path_index == argc) ? "." : argv[path_index] ); } if (option_mode == MODE_DUMP) { return dump(argc, argv, path_index); } if (option_mode == MODE_ANALYZE_DB) { return analyze_db_content(); } LOGERR("ERROR: No proper modus operandi, the dev missed something.\n"); return EXIT_FAILURE; }