aboutsummaryrefslogtreecommitdiff
path: root/src/file_processor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/file_processor.c')
-rw-r--r--src/file_processor.c292
1 files changed, 292 insertions, 0 deletions
diff --git a/src/file_processor.c b/src/file_processor.c
new file mode 100644
index 0000000..1cfed46
--- /dev/null
+++ b/src/file_processor.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/**
+ * Copyright 2026 Thorsten Töpper
+ *
+ * @file file_processor.c
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+
+
+/* https://docs.openssl.org/master/man3/EVP_DigestInit/ */
+#include <openssl/evp.h>
+
+#include "file_processor.h"
+#include "trace_macros.h"
+
+
+/*=========== DEFINES, CONSTANTS AND TYPES ===========*/
+
+#ifndef BUFSIZE4MIB
+/* Personnal observation: dd bs=4M usually gets good performance regardless of SSD/HDD or USB 2.x/3.x */
+#define BUFSIZE4MIB 4194304
+#endif
+
+
+/**
+ * Objects from the OpenSSL library
+ * Improved later code readability by placing those together and manage outside the file handling code
+ */
+struct df_md_components {
+ EVP_MD_CTX *mdctx_blake2;
+ EVP_MD_CTX *mdctx_sha256;
+ EVP_MD_CTX *mdctx_sha512;
+};
+
+
+/*=========== GLOBAL VARIABLES ===========*/
+
+/** TODO: Should be set via command line parameter, move to handling when implementing
+ */
+size_t glbl_bufsize = BUFSIZE4MIB;
+
+const EVP_MD *glbl_md_blake2 = NULL;
+const EVP_MD *glbl_md_sha256 = NULL;
+const EVP_MD *glbl_md_sha512 = NULL;
+
+
+
+/*=========== FUNCTIONS ===========*/
+struct df_md_components *init_md_components();
+void destroy_md_components(struct df_md_components *pkg);
+
+
+/**
+ * Prepare a df_md_components struct for active usage.
+ */
+inline struct df_md_components *init_md_components() {
+ struct df_md_components *pkg = NULL;
+
+ if ((pkg=calloc(1,sizeof(struct df_md_components))) == NULL) {
+ LOGERR("ERROR: Failed to allocate heap memory for a struct df_md_components errno %d: %s\n",
+ errno, strerror(errno));
+ return NULL;
+ }
+
+ /* TODO: research whether performance difference is really worth this additional complexity,
+ * the util is processing files from storage as single thread, not tons of <1KiB blobs in parallel.
+ * So a few nano- or microseconds are mostly insignificant compared to the I/O throttle. */
+ if (glbl_md_blake2 == NULL) {
+ glbl_md_blake2 = EVP_blake2b512();
+ if (glbl_md_blake2 == NULL) {
+ LOGERR("ERROR: Failed to fetch EVP_MD for BLAKE2\n");
+ return NULL;
+ }
+ }
+ if (glbl_md_sha256 == NULL) {
+ glbl_md_sha256 = EVP_sha256();
+ if (glbl_md_sha256 == NULL) {
+ LOGERR("ERROR: Failed to fetch EVP_MD for SHA2_256\n");
+ return NULL;
+ }
+ }
+ if (glbl_md_sha512 == NULL) {
+ glbl_md_sha512 = EVP_sha512();
+ if (glbl_md_sha512 == NULL) {
+ LOGERR("ERROR: Failed to fetch EVP_MD for SHA2_512\n");
+ return NULL;
+ }
+ }
+
+ /* Create the contexts */
+ if ((pkg->mdctx_blake2 = EVP_MD_CTX_new()) == NULL) {
+ LOGERR("ERROR: Failed to create context for BLAKE2\n");
+ free(pkg);
+ return NULL;
+ }
+
+ if ((pkg->mdctx_sha256 = EVP_MD_CTX_new()) == NULL) {
+ LOGERR("ERROR: Failed to create context for SHA256\n");
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ free(pkg);
+ return NULL;
+ }
+
+ if ((pkg->mdctx_sha512 = EVP_MD_CTX_new()) == NULL) {
+ LOGERR("ERROR: Failed to create context for SHA512\n");
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ EVP_MD_CTX_free(pkg->mdctx_sha256);
+ free(pkg);
+ return NULL;
+ }
+
+ /* Initialize them */
+ if (EVP_DigestInit_ex2(pkg->mdctx_blake2, glbl_md_blake2, NULL) != 1) {
+ LOGERR("ERROR: Failed to initialize BLAKE2 context\n");
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ EVP_MD_CTX_free(pkg->mdctx_sha256);
+ EVP_MD_CTX_free(pkg->mdctx_sha512);
+ free(pkg);
+ return NULL;
+ }
+
+ if (EVP_DigestInit_ex2(pkg->mdctx_sha256, glbl_md_sha256, NULL) != 1) {
+ LOGERR("ERROR: Failed to initialize SHA256 context\n");
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ EVP_MD_CTX_free(pkg->mdctx_sha256);
+ EVP_MD_CTX_free(pkg->mdctx_sha512);
+ free(pkg);
+ return NULL;
+ }
+
+ if (EVP_DigestInit_ex2(pkg->mdctx_sha512, glbl_md_sha512, NULL) != 1) {
+ LOGERR("ERROR: Failed to initialize SHA512 context\n");
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ EVP_MD_CTX_free(pkg->mdctx_sha256);
+ EVP_MD_CTX_free(pkg->mdctx_sha512);
+ free(pkg);
+ return NULL;
+ }
+
+ return pkg;
+}
+
+/**
+ * Free all memory related to the given struct including itself
+ * @param pkg the struct to destroy
+ */
+inline void destroy_md_components(struct df_md_components *pkg) {
+ if (pkg == NULL) {
+ return;
+ }
+ if (pkg->mdctx_blake2 != NULL) {
+ EVP_MD_CTX_free(pkg->mdctx_blake2);
+ }
+ if (pkg->mdctx_sha256 != NULL) {
+ EVP_MD_CTX_free(pkg->mdctx_sha256);
+ }
+ if (pkg->mdctx_sha512 != NULL) {
+ EVP_MD_CTX_free(pkg->mdctx_sha512);
+ }
+ free(pkg);
+}
+
+
+/**
+ * Read the file defined by path and name in the argument struct, and add stat
+ * results and binary represented hashes of the file into the struct.
+ *
+ * @param info struct contains the path of the file to read, results will be
+ * stored there.
+ *
+ * @return 0 on success
+ * -1 on failure
+ */
+int process_file(struct df_fileinfo *info) {
+ FILE *fdin = NULL;
+ char fullpath[4096];
+ unsigned char buffer[glbl_bufsize];
+ size_t bytes_read;
+ struct df_md_components *ctx_pkg;
+ bool error_in_loop = false;
+
+ unsigned char md_val[EVP_MAX_MD_SIZE];
+ unsigned int md_len;
+
+ if (info == NULL || info->name == NULL || info->path == NULL ||
+ info->name[0] == '\0' || info->path[0] == '\0') {
+ LOGERR("ERROR: Not enough information to construct a full path.\n");
+ return -1;
+ }
+
+
+ if (snprintf(fullpath, 4096, "%s/%s", info->path, info->name) < 0) {
+ LOGERR("ERROR: Failed to print fullpath string into stack memory: %s (errno %d)\n",
+ strerror(errno), errno);
+ return -1;
+ }
+
+ if (lstat(fullpath, &(info->statbuf)) == -1) {
+ LOGERR("ERROR: lstat() call failed for file %s: %s (errno %d)\n",
+ fullpath, strerror(errno), errno);
+ return -1;
+ }
+
+ if ((info->statbuf.st_mode & S_IFMT) != S_IFREG) {
+ LOGERR("ERROR: Non-regular files are not processed.\n");
+ return -1;
+ }
+
+ if ((ctx_pkg = init_md_components()) == NULL) {
+ LOGERR("ERROR: Failed to initialize/create md contexts to be used with %s\n",
+ fullpath);
+ return -1;
+ }
+
+ if ((fdin=fopen(fullpath, "rb")) == NULL) {
+ LOGERR("ERROR: Failed to open file '%s' %s\n", fullpath, strerror(errno));
+ destroy_md_components(ctx_pkg);
+ return -1;
+ }
+
+ /* TODO: proper fread related error handling */
+ while (true) {
+ bytes_read = fread(buffer, sizeof(unsigned char), glbl_bufsize, fdin);
+
+ if (EVP_DigestUpdate(ctx_pkg->mdctx_blake2, buffer, bytes_read) != 1) {
+ LOGERR("ERROR: Failed to update message digest BLAKE2 of file '%s'\n", fullpath);
+ error_in_loop = true;
+ break;
+ }
+ if (EVP_DigestUpdate(ctx_pkg->mdctx_sha256, buffer, bytes_read) != 1) {
+ LOGERR("ERROR: Failed to update message digest SHA256 of file '%s'\n", fullpath);
+ error_in_loop = true;
+ break;
+ }
+ if (EVP_DigestUpdate(ctx_pkg->mdctx_sha512, buffer, bytes_read) != 1) {
+ LOGERR("ERROR: Failed to update message digest SHA512 of file '%s'\n", fullpath);
+ error_in_loop = true;
+ break;
+ }
+ if (bytes_read != glbl_bufsize) {
+ if (feof(fdin) != 0) {
+ break;
+ }
+ if (ferror(fdin) != 0) {
+ LOGERR("ERROR: Failed to read from %s: %s (errno %d)\n",
+ fullpath, strerror(errno), errno);
+ error_in_loop = true;
+ break;
+ }
+ }
+ }
+ fclose(fdin);
+ fdin = NULL;
+ if (error_in_loop) {
+ destroy_md_components(ctx_pkg);
+ return -1;
+ }
+
+ if (EVP_DigestFinal_ex(ctx_pkg->mdctx_blake2, md_val, &md_len) != 1) {
+ LOGERR("ERROR: Failed to finalize MD BLAKE2 of file '%s'\n", fullpath);
+ destroy_md_components(ctx_pkg);
+ return -1;
+ }
+ memcpy(info->blake2, md_val, md_len);
+
+ if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha256, md_val, &md_len) != 1) {
+ LOGERR("ERROR: Failed to finalize MD SHA256 of file '%s'\n", fullpath);
+ destroy_md_components(ctx_pkg);
+ return -1;
+ }
+ memcpy(info->sha256, md_val, md_len);
+
+ if (EVP_DigestFinal_ex(ctx_pkg->mdctx_sha512, md_val, &md_len) != 1) {
+ LOGERR("ERROR: Failed to finalize MD SHA512 of file '%s'\n", fullpath);
+ destroy_md_components(ctx_pkg);
+ return -1;
+ }
+ memcpy(info->sha512, md_val, md_len);
+
+ return 0;
+}
+
+