From 9e2f3d59cf249403859916df9756c179753ea7e0 Mon Sep 17 00:00:00 2001
From: Thorsten Töpper <atsutane@freethoughts.de>
Date: Sun, 10 Aug 2025 18:16:07 +0200
Subject: split_for_sort: Split a given file into buckets

The target bucket is decided based on the first X characters of a line.
The bucket name gets a prefix defined as argument and can be sorted
faster on weak hardware. Note: This is just a split alternative.

Real world usage in a shell script with a file in which the first 10
characters are the equal in each line, the following 2 bytes are
evaluated for splitting:

split_for_sort TMPSFS 12 raw_data.txt
for f in TMPSFS ; do
    sort -o "${f}_sorted" -u "${f}"
done
\# Rely on the argument resolution to go with lexical order
cat TMPSFS*_sorted > sorted_data.txt
rm TMPSFS*
---
 include/output.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/output.h

(limited to 'include/output.h')

diff --git a/include/output.h b/include/output.h
new file mode 100644
index 0000000..efc7487
--- /dev/null
+++ b/include/output.h
@@ -0,0 +1,20 @@
+/*
+ * vim:ts=4:sw=4:expandtab
+ */
+#ifndef OUTPUT_H
+#define OUTPUT_H
+
+#include <stdio.h>
+
+#ifndef LOGERR
+#define LOGERR(...) {fprintf(stderr, "[%s:%d] %s: ", __FILE__, __LINE__, __func__); fprintf(stderr, __VA_ARGS__);}
+#endif
+
+#ifdef DEBUGBUILD
+#define DBGTRC(...) LOGERR(__VA_ARGS__)
+#else
+#define DBGTRC(...)
+#endif
+
+#endif
+
-- 
cgit v1.2.3-70-g09d2