From 9e2f3d59cf249403859916df9756c179753ea7e0 Mon Sep 17 00:00:00 2001 From: Thorsten Töpper Date: Sun, 10 Aug 2025 18:16:07 +0200 Subject: split_for_sort: Split a given file into buckets The target bucket is decided based on the first X characters of a line. The bucket name gets a prefix defined as argument and can be sorted faster on weak hardware. Note: This is just a split alternative. Real world usage in a shell script with a file in which the first 10 characters are the equal in each line, the following 2 bytes are evaluated for splitting: split_for_sort TMPSFS 12 raw_data.txt for f in TMPSFS ; do sort -o "${f}_sorted" -u "${f}" done \# Rely on the argument resolution to go with lexical order cat TMPSFS*_sorted > sorted_data.txt rm TMPSFS* --- include/hex_conversion.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ include/output.h | 20 ++++++++++ 2 files changed, 116 insertions(+) create mode 100644 include/hex_conversion.h create mode 100644 include/output.h (limited to 'include') diff --git a/include/hex_conversion.h b/include/hex_conversion.h new file mode 100644 index 0000000..b31216d --- /dev/null +++ b/include/hex_conversion.h @@ -0,0 +1,96 @@ +/* + * vim:ts=4:sw=4:expandtab + */ +#ifndef HEX_CONVERSION_H +#define HEX_CONVERSION_H + +#include +#include +#include + +#ifdef DEBUGBUILD +#include "output.h" +#endif + +#define ishex_macro(c) ((c>='0' && c <= '9') || (c>='A' && c <= 'F') || (c>='a' && c <= 'f')) + +int convert_line(char *s); +int ishex(char c); +char *convert_to_binary(char *hex, char *out); +char *convert_from_binary(char *bin, size_t l, char *out); + +/* short inline functions are fine in header */ +inline int convert_line(char *s) { + size_t i = 0, l = 0; + if (s == NULL) + return -1; + l=strlen(s); + for (i=0; i='0' && c <= '9') || (c>='A' && c <= 'F') || (c>='a' && c <= 'f')) { + return 1; + } + return 0; +}; + +inline char *convert_to_binary(char *hex, char *out) { + char tmp[3] = {0,0,0}; + size_t length, i; + if (hex == NULL) return NULL; + length=strlen(hex); + if ( (length==0) || (length%2 == 1)) return NULL; + for (i=0; i + +#ifndef LOGERR +#define LOGERR(...) {fprintf(stderr, "[%s:%d] %s: ", __FILE__, __LINE__, __func__); fprintf(stderr, __VA_ARGS__);} +#endif + +#ifdef DEBUGBUILD +#define DBGTRC(...) LOGERR(__VA_ARGS__) +#else +#define DBGTRC(...) +#endif + +#endif + -- cgit v1.2.3-70-g09d2