diff options
| -rw-r--r-- | include/database_interaction.h | 3 | ||||
| -rw-r--r-- | src/database_interaction.c | 64 | ||||
| -rw-r--r-- | src/duplicate_finder.c | 66 |
3 files changed, 121 insertions, 12 deletions
diff --git a/include/database_interaction.h b/include/database_interaction.h index 9ae0da7..3414a4b 100644 --- a/include/database_interaction.h +++ b/include/database_interaction.h @@ -31,4 +31,7 @@ int dbi_insert_fileinfo(struct df_fileinfo *fi); int dbi_update_fileinfo_last_seen(int64_t id); int dbi_update_fileinfo_complete(struct df_fileinfo *fi, int64_t existing_id); + +int dbi_print_fileinfo_resolved(FILE *fd); + #endif diff --git a/src/database_interaction.c b/src/database_interaction.c index 0dd729d..37ce32c 100644 --- a/src/database_interaction.c +++ b/src/database_interaction.c @@ -134,6 +134,8 @@ void dbi_close() { LOCAL_FINALIZE(select_fileinfo_by_filename_id); LOCAL_FINALIZE(select_fileinfo_by_path_filename_ids); LOCAL_FINALIZE(select_fileinfo_by_hash_id); + LOCAL_FINALIZE(select_fileinfo_complete_table); + LOCAL_FINALIZE(select_fileinfo_complete_table_resolved); LOCAL_FINALIZE(insert_filename); LOCAL_FINALIZE(insert_pathname); @@ -220,8 +222,8 @@ int prepare_statements() { /* TODO: so far the only query with JOINs or masking it in another way? * Many years since */ - LOCAL_PREP_STMT("SELECT paths.path_name, filenames.name, hashes.blake2, hashes.sha256, hashes.512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.id = ? ;", &select_fileinfo_by_id_resolved); - LOCAL_PREP_STMT("SELECT paths.path_name, filenames.name, hashes.blake2, hashes.sha256, hashes.512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id ;", &select_fileinfo_complete_table_resolved); + LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id WHERE fileinfo.id = ? ;", &select_fileinfo_by_id_resolved); + LOCAL_PREP_STMT("SELECT paths.pathname, filenames.name, hashes.blake2, hashes.sha256, hashes.sha512, fileinfo.size, fileinfo.last_seen, fileinfo.stat_struct FROM fileinfo INNER JOIN paths ON fileinfo.p_id = paths.id INNER JOIN filenames ON fileinfo.fn_id = filenames.id INNER JOIN hashes ON fileinfo.h_id = hashes.id ;", &select_fileinfo_complete_table_resolved); LOCAL_PREP_STMT("SELECT p_id, fn_id, h_id, size, last_seen, stat_struct FROM fileinfo ;", &select_fileinfo_complete_table); /* INSERT */ @@ -875,6 +877,64 @@ int dbi_insert_fileinfo(struct df_fileinfo *fi) { return rc; }; + +/** + * Print the database content to the given filedescriptor in a CSV form which can be processed with grep awk etc. + * + * @param out output descriptor, if NULL stdout is used. + * @return 0 on success, + * -1 on failure + */ +int dbi_print_fileinfo_resolved(FILE *out) { + int rc = 0; + int64_t i; + int strc = 0; + FILE *fd = out; + const unsigned char *txt = NULL; + sqlite3_stmt *st = select_fileinfo_complete_table_resolved; + + DBCONN_CHECK(-1); + + sqlite3_reset(st); + + + if (fd == NULL) { fd = stdout; } + + /* We ignore the struct_stat and last_seen columns */ + + do { + strc = sqlite3_step(st); + + if (strc == SQLITE_DONE) { + break; + } + + if (strc != SQLITE_ROW) { + LOGERR("ERROR: Failed step: %s\n", sqlite3_errmsg(dbconn)); + rc = -1; + break; + } + + txt = sqlite3_column_text(st, 0); /* paths.pathname */ + fprintf(fd, "%s/", txt); + txt = sqlite3_column_text(st, 1); /* filenames.name */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(st, 2); /* hashes.blake2 */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(st, 3); /* hashes.sha256 */ + fprintf(fd, "%s;", txt); + txt = sqlite3_column_text(st, 4); /* hashes.sha512 */ + fprintf(fd, "%s;", txt); + i = (int64_t) sqlite3_column_int64(st, 5); /* fileinfo.size */ + fprintf(fd, "%ld\n", i); + + } while (strc == SQLITE_ROW); + + sqlite3_reset(st); + + return rc; +} + #if 0 *select_fileinfo_by_id, *select_fileinfo_by_id_resolved, diff --git a/src/duplicate_finder.c b/src/duplicate_finder.c index 714fd96..dae7832 100644 --- a/src/duplicate_finder.c +++ b/src/duplicate_finder.c @@ -28,22 +28,47 @@ /*=========== GLOBAL VARIABLES ===========*/ /*=========== FUNCTIONS ===========*/ +int analyze_db_content(); +int scan (const char *path); -int main(int argc, char **argv) { - int path_index = 1; +/** + * The wrapper around automated DB content analysis. + * + * @return EXIT_SUCCESS on success + * EXIT_FAILURE on failure + */ +int analyze_db_content() { - if (argc > 1) { - path_index = parse_arguments(argc, argv); + if ( ! dbi_open(option_sqlite_db_name) ) { + return EXIT_FAILURE; } - /* TODO: name as option */ - if ( ! kv_open_storage("/tmp/duplicate_finder.gdbm") ) { + + /* TODO: Implementation of several SQL queries... not in the mood */ + dbi_print_fileinfo_resolved(stdout); + + dbi_close(); + return EXIT_SUCCESS; +} + + +/** + * Scan the given path... + * + * @return EXIT_SUCCESS on success + * EXIT_FAILURE on failure + */ + +int scan(const char *path) { + if ( ! kv_open_storage(option_gdbm_db_name) ) { return EXIT_FAILURE; } - /* TODO: name as option */ - dbi_open("/tmp/duplicate_finder.sqlite"); - traverse_directory_tree((path_index == argc) ? argv[path_index] : "."); + if ( ! dbi_open(option_sqlite_db_name) ) { + return EXIT_FAILURE; + } + + traverse_directory_tree(path); #ifdef DEBUGBUILD kv_dump(stdout); @@ -58,7 +83,28 @@ int main(int argc, char **argv) { /* TODO: Implement signal handlers and add the close for sqlite and gdbm dbs there */ kv_close_storage(); dbi_close(); - return EXIT_SUCCESS; } +int main(int argc, char **argv) { + int path_index = 1; + + if (argc > 1) { + path_index = parse_arguments(argc, argv); + } else { + LOGERR("ERROR: Too few arguments given, see --help or man.\n"); + return EXIT_FAILURE; + } + + if (option_mode == MODE_SCAN) { + return scan((path_index == argc) ? argv[path_index] : "."); + } + + if (option_mode == MODE_ANALYZE_DB) { + return analyze_db_content(); + } + + LOGERR("ERROR: No proper modus operandi, the dev missed something.\n"); + return EXIT_FAILURE; +} + |
