aboutsummaryrefslogtreecommitdiff
path: root/src/duplicate_finder.c
blob: 8a058fdec19a57a5328b61f7ce2eca56c54a3015 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/* SPDX-License-Identifier: Apache-2.0 */

/**
 * Copyright 2026 Thorsten Töpper
 *
 * Keep track of files across different paths or filesystems with a sqlite db
 * and identify duplicates. This utility should be used for housekeeping when
 * spreading data across multiple FS / integrate old disks into newer setups.
 *
 * The DB stores SHA512 and SHA256 hashes calculated with the OpenSSL library,
 * path and filenames and their corresponding stat() FS data.
 *
 * @file duplicate_finder.c
 *
 * vim:ts=4:sw=4:expandtab
 */

#include <stdlib.h>

#include "trace_macros.h"
#include "options.h"
#include "kv_manager.h"
#include "directory_scanner.h"
#include "database_interaction.h"

/*=========== DEFINES, CONSTANTS AND TYPES ===========*/

/*=========== GLOBAL VARIABLES ===========*/

/*=========== FUNCTIONS ===========*/
int analyze_db_content();
int scan (const char *path);

/**
 * The wrapper around automated DB content analysis.
 *
 * @return EXIT_SUCCESS on success
 *         EXIT_FAILURE on failure
 */
int analyze_db_content() {

    if ( ! dbi_open(option_sqlite_db_name) ) {
        return EXIT_FAILURE;
    }


    /* TODO: Implementation of several SQL queries... not in the mood */
    dbi_print_fileinfo_resolved(stdout);

    printf("\n\n---- IDENTICAL HASHES ----\n\n");

    if (dbi_print_identical_hashes(stdout) < 0) {
        LOGERR("ERROR: Identification of duplicates via hashes failed.\n");
        dbi_close();
        return EXIT_FAILURE;
    }

    dbi_close();
    return EXIT_SUCCESS;
}


/**
 * Scan the given path...
 *
 * @return EXIT_SUCCESS on success
 *         EXIT_FAILURE on failure
 */

int scan(const char *path) {
    if ( ! kv_open_storage(option_gdbm_db_name) ) {
        return EXIT_FAILURE;
    }

    if ( ! dbi_open(option_sqlite_db_name) ) {
        return EXIT_FAILURE;
    }

    traverse_directory_tree(path);

#ifdef DEBUGBUILD
    kv_dump(stdout);
#endif

    process_gdbm_content();

#ifdef DEBUGBUILD
    kv_dump(stdout);
#endif

    /* TODO: Implement signal handlers and add the close for sqlite and gdbm dbs there */
    kv_close_storage();
    dbi_close();
    return EXIT_SUCCESS;
}

int main(int argc, char **argv) {
    int path_index = 1;

    if (argc > 1) {
        path_index = parse_arguments(argc, argv);
    } else {
        LOGERR("ERROR: Too few arguments given, see --help or man.\n");
        return EXIT_FAILURE;
    }

    DBGTRC("Database:  %s\n", option_sqlite_db_name);
    DBGTRC("KVStorage: %s\n", option_gdbm_db_name);


    if (option_mode == MODE_SCAN) {
        return scan((path_index == argc) ? "." : argv[path_index] );
    }

    if (option_mode == MODE_ANALYZE_DB) {
        return analyze_db_content();
    }

    LOGERR("ERROR: No proper modus operandi, the dev missed something.\n");
    return EXIT_FAILURE;
}