From 9e2f3d59cf249403859916df9756c179753ea7e0 Mon Sep 17 00:00:00 2001 From: Thorsten Töpper Date: Sun, 10 Aug 2025 18:16:07 +0200 Subject: split_for_sort: Split a given file into buckets The target bucket is decided based on the first X characters of a line. The bucket name gets a prefix defined as argument and can be sorted faster on weak hardware. Note: This is just a split alternative. Real world usage in a shell script with a file in which the first 10 characters are the equal in each line, the following 2 bytes are evaluated for splitting: split_for_sort TMPSFS 12 raw_data.txt for f in TMPSFS ; do sort -o "${f}_sorted" -u "${f}" done \# Rely on the argument resolution to go with lexical order cat TMPSFS*_sorted > sorted_data.txt rm TMPSFS* --- Makefile | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Makefile (limited to 'Makefile') diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4d8a55d --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ + + +CFLAGS += -Wall -Wextra -Wformat -Wformat=2 -Wconversion -Wimplicit-fallthrough -Werror=format-security \ + -Wbidi-chars=any -Werror=implicit -Werror=incompatible-pointer-types -Werror=int-conversion \ + -fno-delete-null-pointer-checks -fstack-clash-protection -fstack-protector-strong -fstrict-flex-arrays=3 + +DEBUG_CFLAGS = -DDEBUGBUILD -g -Werror -fsanitize=address + +PROD_CFLAGS = -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -FPIE -pie -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries + +CFLAGS += -I include + +all: out/split_for_sort + + + +clean: out + rm -fv out/debug/* + rm -fv out/* + rm -rfv out + +out: + mkdir out + +out/debug: out + mkdir out/debug + +# Lazy, not splitting compiler & linker calls +out/split_for_sort: out src/split_for_sort.c include/output.h + ${CC} -o $@ ${CFLAGS} ${PROD_CFLAGS} src/split_for_sort.c + +out/debug/split_for_sort_debug: out/debug src/split_for_sort.c include/output.h + ${CC} -o $@ ${CFLAGS} ${DEBUG_CFLAGS} src/split_for_sort.c + + -- cgit v1.2.3-70-g09d2