From eed2d1323441861f2d41f0ecc0a72fcc9190fa5f Mon Sep 17 00:00:00 2001 From: Thorsten Töpper Date: Sat, 7 Feb 2026 21:43:17 +0100 Subject: file processor: Copied from my small-utils project --- src/file_processor.c | 292 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 src/file_processor.c (limited to 'src/file_processor.c') diff --git a/src/file_processor.c b/src/file_processor.c new file mode 100644 index 0000000..1cfed46 --- /dev/null +++ b/src/file_processor.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/** + * Copyright 2026 Thorsten Töpper + * + * @file file_processor.c + * + * vim:ts=4:sw=4:expandtab + */ +#include +#include +#include +#include +#include +#include +#include + + +/* https://docs.openssl.org/master/man3/EVP_DigestInit/ */ +#include + +#include "file_processor.h" +#include "trace_macros.h" + + +/*=========== DEFINES, CONSTANTS AND TYPES ===========*/ + +#ifndef BUFSIZE4MIB +/* Personnal observation: dd bs=4M usually gets good performance regardless of SSD/HDD or USB 2.x/3.x */ +#define BUFSIZE4MIB 4194304 +#endif + + +/** + * Objects from the OpenSSL library + * Improved later code readability by placing those together and manage outside the file handling code + */ +struct df_md_components { + EVP_MD_CTX *mdctx_blake2; + EVP_MD_CTX *mdctx_sha256; + EVP_MD_CTX *mdctx_sha512; +}; + + +/*=========== GLOBAL VARIABLES ===========*/ + +/** TODO: Should be set via command line parameter, move to handling when implementing + */ +size_t glbl_bufsize = BUFSIZE4MIB; + +const EVP_MD *glbl_md_blake2 = NULL; +const EVP_MD *glbl_md_sha256 = NULL; +const EVP_MD *glbl_md_sha512 = NULL; + + + +/*=========== FUNCTIONS ===========*/ +struct df_md_components *init_md_components(); +void destroy_md_components(struct df_md_components *pkg); + + +/** + * Prepare a df_md_components struct for active usage. + */ +inline struct df_md_components *init_md_components() { + struct df_md_components *pkg = NULL; + + if ((pkg=calloc(1,sizeof(struct df_md_components))) == NULL) { + LOGERR("ERROR: Failed to allocate heap memory for a struct df_md_components errno %d: %s\n", + errno, strerror(errno)); + return NULL; + } + + /* TODO: research whether performance difference is really worth this additional complexity, + * the util is processing files from storage as single thread, not tons of <1KiB blobs in parallel. + * So a few nano- or microseconds are mostly insignificant compared to the I/O throttle. */ + if (glbl_md_blake2 == NULL) { + glbl_md_blake2 = EVP_blake2b512(); + if (glbl_md_blake2 == NULL) { + LOGERR("ERROR: Failed to fetch EVP_MD for BLAKE2\n"); + return NULL; + } + } + if (glbl_md_sha256 == NULL) { + glbl_md_sha256 = EVP_sha256(); + if (glbl_md_sha256 == NULL) { + LOGERR("ERROR: Failed to fetch EVP_MD for SHA2_256\n"); + return NULL; + } + } + if (glbl_md_sha512 == NULL) { + glbl_md_sha512 = EVP_sha512(); + if (glbl_md_sha512 == NULL) { + LOGERR("ERROR: Failed to fetch EVP_MD for SHA2_512\n"); + return NULL; + } + } + + /* Create the contexts */ + if ((pkg->mdctx_blake2 = EVP_MD_CTX_new()) == NULL) { + LOGERR("ERROR: Failed to create context for BLAKE2\n"); + free(pkg); + return NULL; + } + + if ((pkg->mdctx_sha256 = EVP_MD_CTX_new()) == NULL) { + LOGERR("ERROR: Failed to create context for SHA256\n"); + EVP_MD_CTX_free(pkg->mdctx_blake2); + free(pkg); + return NULL; + } + + if ((pkg->mdctx_sha512 = EVP_MD_CTX_new()) == NULL) { + LOGERR("ERROR: Failed to create context for SHA512\n"); + EVP_MD_CTX_free(pkg->mdctx_blake2); + EVP_MD_CTX_free(pkg->mdctx_sha256); + free(pkg); + return NULL; + } + + /* Initialize them */ + if (EVP_DigestInit_ex2(pkg->mdctx_blake2, glbl_md_blake2, NULL) != 1) { + LOGERR("ERROR: Failed to initialize BLAKE2 context\n"); + EVP_MD_CTX_free(pkg->mdctx_blake2); + EVP_MD_CTX_free(pkg->mdctx_sha256); + EVP_MD_CTX_free(pkg->mdctx_sha512); + free(pkg); + return NULL; + } + + if (EVP_DigestInit_ex2(pkg->mdctx_sha256, glbl_md_sha256, NULL) != 1) { + LOGERR("ERROR: Failed to initialize SHA256 context\n"); + EVP_MD_CTX_free(pkg->mdctx_blake2); + EVP_MD_CTX_free(pkg->mdctx_sha256); + EVP_MD_CTX_free(pkg->mdctx_sha512); + free(pkg); + return NULL; + } + + if (EVP_DigestInit_ex2(pkg->mdctx_sha512, glbl_md_sha512, NULL) != 1) { + LOGERR("ERROR: Failed to initialize SHA512 context\n"); + EVP_MD_CTX_free(pkg->mdctx_blake2); + EVP_MD_CTX_free(pkg->mdctx_sha256); + EVP_MD_CTX_free(pkg->mdctx_sha512); + free(pkg); + return NULL; + } + + return pkg; +} + +/** + * Free all memory related to the given struct including itself + * @param pkg the struct to destroy + */ +inline void destroy_md_components(struct df_md_components *pkg) { + if (pkg == NULL) { + return; + } + if (pkg->mdctx_blake2 != NULL) { + EVP_MD_CTX_free(pkg->mdctx_blake2); + } + if (pkg->mdctx_sha256 != NULL) { + EVP_MD_CTX_free(pkg->mdctx_sha256); + } + if (pkg->mdctx_sha512 != NULL) { + EVP_MD_CTX_free(pkg->mdctx_sha512); + } + free(pkg); +} + + +/** + * Read the file defined by path and name in the argument struct, and add stat + * results and binary represented hashes of the file into the struct. + * + * @param info struct contains the path of the file to read, results will be + * stored there. + * + * @return 0 on success + * -1 on failure + */ +int process_file(struct df_fileinfo *info) { + FILE *fdin = NULL; + char fullpath[4096]; + unsigned char buffer[glbl_bufsize]; + size_t bytes_read; + struct df_md_components *ctx_pkg; + bool error_in_loop = false; + + unsigned char md_val[EVP_MAX_MD_SIZE]; + unsigned int md_len; + + if (info == NULL || info->name == NULL || info->path == NULL || + info->name[0] == '\0' || info->path[0] == '\0') { + LOGERR("ERROR: Not enough information to construct a full path.\n"); + return -1; + } + + + if (snprintf(fullpath, 4096, "%s/%s", info->path, info->name) < 0) { + LOGERR("ERROR: Failed to print fullpath string into stack memory: %s (errno %d)\n", + strerror(errno), errno); + return -1; + } + + if (lstat(fullpath, &(info->statbuf)) == -1) { + LOGERR("ERROR: lstat() call failed for file %s: %s (errno %d)\n", + fullpath, strerror(errno), errno); + return -1; + } + + if ((info->statbuf.st_mode & S_IFMT) != S_IFREG) { + LOGERR("ERROR: Non-regular files are not processed.\n"); + return -1; + } + + if ((ctx_pkg = init_md_components()) == NULL) { + LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n", + fullpath); + return -1; + } + + if ((fdin=fopen(fullpath, "rb")) == NULL) { + LOGERR("ERROR: Failed to open file '%s' %s\n", fullpath, strerror(errno)); + destroy_md_components(ctx_pkg); + return -1; + } + + /* TODO: proper fread related error handling */ + while (true) { + bytes_read = fread(buffer, sizeof(unsigned char), glbl_bufsize, fdin); + + if (EVP_DigestUpdate(ctx_pkg->mdctx_blake2, buffer, bytes_read) != 1) { + LOGERR("ERROR: Failed to update message digest BLAKE2 of file '%s'\n", fullpath); + error_in_loop = true; + break; + } + if (EVP_DigestUpdate(ctx_pkg->mdctx_sha256, buffer, bytes_read) != 1) { + LOGERR("ERROR: Failed to update message digest SHA256 of file '%s'\n", fullpath); + error_in_loop = true; + break; + } + if (EVP_DigestUpdate(ctx_pkg->mdctx_sha512, buffer, bytes_read) != 1) { + LOGERR("ERROR: Failed to update message digest SHA512 of file '%s'\n", fullpath); + error_in_loop = true; + break; + } + if (bytes_read != glbl_bufsize) { + if (feof(fdin) != 0) { + break; + } + if (ferror(fdin) != 0) { + LOGERR("ERROR: Failed to read from %s: %s (errno %d)\n", + fullpath, strerror(errno), errno); + error_in_loop = true; + break; + } + } + } + fclose(fdin); + fdin = NULL; + if (error_in_loop) { + destroy_md_components(ctx_pkg); + return -1; + } + + if (EVP_DigestFinal_ex(ctx_pkg->mdctx_blake2, md_val, &md_len) != 1) { + LOGERR("ERROR: Failed to finalize MD BLAKE2 of file '%s'\n", fullpath); + destroy_md_components(ctx_pkg); + return -1; + } + memcpy(info->blake2, md_val, md_len); + + if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha256, md_val, &md_len) != 1) { + LOGERR("ERROR: Failed to finalize MD SHA256 of file '%s'\n", fullpath); + destroy_md_components(ctx_pkg); + return -1; + } + memcpy(info->sha256, md_val, md_len); + + if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha512, md_val, &md_len) != 1) { + LOGERR("ERROR: Failed to finalize MD SHA512 of file '%s'\n", fullpath); + destroy_md_components(ctx_pkg); + return -1; + } + memcpy(info->sha512, md_val, md_len); + + return 0; +} + + -- cgit v1.3