aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2026-02-07 21:43:17 +0100
committerThorsten Töpper <atsutane@freethoughts.de>2026-02-07 21:43:17 +0100
commiteed2d1323441861f2d41f0ecc0a72fcc9190fa5f (patch)
tree779cd7c1768504308e9957cfbc5cfc271e89f1c5 /include
parentb7d09007d04c3b7c38848dd05d6105f3354b6b15 (diff)
downloadduplicate_finder-eed2d1323441861f2d41f0ecc0a72fcc9190fa5f.tar.gz
duplicate_finder-eed2d1323441861f2d41f0ecc0a72fcc9190fa5f.tar.bz2
file processor: Copied from my small-utils project
Diffstat (limited to 'include')
-rw-r--r--include/file_processor.h41
-rw-r--r--include/hex_conversion.h113
-rw-r--r--include/trace_macros.h23
3 files changed, 177 insertions, 0 deletions
diff --git a/include/file_processor.h b/include/file_processor.h
new file mode 100644
index 0000000..8cfb6de
--- /dev/null
+++ b/include/file_processor.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef FILE_PROCESSOR_H
+#define FILE_PROCESSOR_H
+
+#include <sys/stat.h>
+
+#define DF_BYTE_SIZE_256 32
+#define DF_BYTE_SIZE_512 64
+
+/* Aliases for convenience, currently all algorithms are part of the default
+ * provider. */
+#define DF_OSSL_BLAKE2 "BLAKE2B-512"
+#define DF_OSSL_SHA256 "SHA2-256"
+#define DF_OSSL_SHA512 "SHA2-512"
+
+/**
+ * information about a file
+ * Contains filepath, stat() results, hash values of multiple algorithms.
+ * TODO: Organize the paths in a global pool (list/tree/map) and only refer there
+ * without any free() calls triggered through the pointer in the struct.
+ */
+struct df_fileinfo {
+ char *path; /**< pointer to the path of the file */
+ char *name; /**< pointer to the name of the file */
+ unsigned char blake2[DF_BYTE_SIZE_512]; /**< The BLAKE2-512 hash in binary form */
+ unsigned char sha256[DF_BYTE_SIZE_256]; /**< The SHA256 hash in binary form. */
+ unsigned char sha512[DF_BYTE_SIZE_512]; /**< The SHA512 hash in binary form. */
+ struct stat statbuf; /**< Result of lstat() call. Symlinks are to be ignored and filtered out earlier. */
+};
+
+
+/*=========== FUNCTIONS ===========*/
+int process_file(struct df_fileinfo *info);
+
+#endif
+
diff --git a/include/hex_conversion.h b/include/hex_conversion.h
new file mode 100644
index 0000000..90ab9e4
--- /dev/null
+++ b/include/hex_conversion.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef HEX_CONVERSION_H
+#define HEX_CONVERSION_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifdef DEBUGBUILD
+#include "trace_macros.h"
+#endif
+
+#define ishex_macro(c) ((c>='0' && c <= '9') || (c>='A' && c <= 'F') || (c>='a' && c <= 'f'))
+
+int convert_line(char *s);
+int ishex(unsigned char c);
+int ishex_string(const char *s, size_t l);
+unsigned char *convert_to_binary(char *hex, unsigned char *out);
+char *convert_from_binary(unsigned char *bin, size_t l, char *out);
+
+/* short inline functions are fine in header */
+inline int convert_line(char *s) {
+ size_t i = 0, l = 0;
+ if (s == NULL)
+ return -1;
+ l=strlen(s);
+ for (i=0; i<l; i++) {
+ s[i] = (char)toupper(s[i]);
+ if ((s[i] == '\r' && (l-i<3)) || (s[i] == '\n' && i==l-1)) {
+ s[i] = '\0';
+ break;
+ }
+ }
+ return 0;
+}
+
+inline int ishex(unsigned char c) {
+ if ((c>='0' && c <= '9') || (c>='A' && c <= 'F') || (c>='a' && c <= 'f')) {
+ return 1;
+ }
+ return 0;
+};
+
+inline int ishex_string(const char *s, size_t l) {
+ size_t i = 0;
+ if (s == 0)
+ return 0;
+ if (l == 0)
+ l = strlen(s);
+ for (; i<l; i++) {
+ if ( ! ishex_macro(s[i]) )
+ return 0;
+ }
+ return 1;
+}
+
+inline unsigned char *convert_to_binary(char *hex, unsigned char *out) {
+ char tmp[3] = {0,0,0};
+ size_t length, i;
+ if (hex == NULL) return NULL;
+ length=strlen(hex);
+ if ( (length==0) || (length%2 == 1)) return NULL;
+ for (i=0; i<length; i++) {
+ if ( ! ishex_macro(hex[i]) ) {
+#ifdef DEBUGBUILD
+ LOGERR("Incompatible string '%s'\n", hex);
+#endif
+ return NULL;
+ }
+ }
+ if (out == NULL && ((out = calloc((length/2),sizeof(char))) == NULL)) {
+#ifdef DEBUGBUILD
+ LOGERR("ERROR: Failed to allocate %lu bytes\n", (length/2));
+#endif
+ return NULL;
+ }
+ for (i=0;i<length;i+=2) {
+ tmp[0] = hex[i];
+ tmp[1] = hex[i+1];
+ out[i/2] = (unsigned char) strtol(tmp, NULL, 16);
+ }
+ return out;
+}
+
+/* Use a large buffer and complex method, as with a simple
+ * way there regularly were corrupt results with gcc -O2. */
+inline char *convert_from_binary(unsigned char *bin, size_t l, char *out) {
+ char tmp[24];
+ size_t i,pos;
+ if (bin == NULL || l == 0) return NULL;
+ if (out == NULL && (out = calloc(((l*2)+1),sizeof(char))) == NULL) {
+#ifdef DEBUGBUILD
+ LOGERR("ERROR: Failed to allocate %lu bytes\n", ((l*2)+1));
+#endif
+ return NULL;
+ }
+ for (i=0; i<l; i++) {
+ /* Keep in mind this format is not only two characters. */
+ sprintf(tmp, "%02X", (unsigned char)bin[i]);
+ pos=strlen(tmp);
+ out[i*2] = tmp[pos-2];
+ out[(i*2)+1] = tmp[pos-1];
+ }
+ return out;
+}
+
+#endif
+
diff --git a/include/trace_macros.h b/include/trace_macros.h
new file mode 100644
index 0000000..b27dc40
--- /dev/null
+++ b/include/trace_macros.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2026 Thorsten Töpper
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef TRACE_MACROS_H
+#define TRACE_MACROS_H
+
+#include <stdio.h>
+
+#ifndef LOGERR
+#define LOGERR(...) {fprintf(stderr, "[%s:%d] %s: ", __FILE__, __LINE__, __func__); fprintf(stderr, __VA_ARGS__);}
+#endif
+
+#ifdef DEBUGBUILD
+#define DBGTRC(...) LOGERR(__VA_ARGS__)
+#else
+#define DBGTRC(...)
+#endif
+
+#endif
+