aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2025-09-10 21:23:39 +0200
committerThorsten Töpper <atsutane@freethoughts.de>2025-09-10 21:23:39 +0200
commit3d4e2fc261d3471eac5e68c5905ab5980f0ff89f (patch)
treea28e3e8a6e58a65dd88e652a502a4b16b5ed6405
parentc4e08ec3777b78b6f788eec6737524cc5e856107 (diff)
downloadsmall-utils-3d4e2fc261d3471eac5e68c5905ab5980f0ff89f.tar.gz
small-utils-3d4e2fc261d3471eac5e68c5905ab5980f0ff89f.tar.bz2
custom_uniq: uniq -u buggy? workaround
-rw-r--r--Makefile8
-rw-r--r--src/custom_uniq.c82
2 files changed, 90 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 3deee66..b805715 100644
--- a/Makefile
+++ b/Makefile
@@ -12,12 +12,14 @@ CFLAGS += -I include
all: \
+ out/custom_uniq \
out/mem_internal_check \
out/split_for_sort \
out/tree_based_check
debug: \
+ out/debug/custom_uniq \
out/debug/mem_internal_check \
out/debug/split_for_sort \
out/debug/tree_based_check
@@ -38,6 +40,12 @@ out/debug: out
# Lazy, not splitting compiler & linker calls
+out/custom_uniq: out src/custom_uniq.c include/trace_macros.h
+ ${CC} -o $@ ${CFLAGS} ${PROD_CFLAGS} src/custom_uniq.c
+
+out/debug/custom_uniq: out/debug src/custom_uniq.c include/trace_macros.h
+ ${CC} -o $@ ${CFLAGS} ${DEBUG_CFLAGS} src/custom_uniq.c
+
out/mem_internal_check: out src/mem_internal_check.c \
include/trace_macros.h include/hex_conversion.h
${CC} -o $@ ${CFLAGS} ${PROD_CFLAGS} src/mem_internal_check.c
diff --git a/src/custom_uniq.c b/src/custom_uniq.c
new file mode 100644
index 0000000..82ce3da
--- /dev/null
+++ b/src/custom_uniq.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+/* Copyright 2025 Thorsten Töpper
+ *
+ * I noticed GNU uniq -u not always removed all overlaps, instead
+ * of spending hours in creating a reproducable test case and code
+ * review, a simple custom implementation of that mode.
+ *
+ * vim:ts=4:sw=4:expandtab
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include "trace_macros.h"
+
+#define LINE_LENGTH 4096
+
+/* prev=(cur)?0:1; less cycles? */
+#define INDEXSWITCH cur=(cur+1)%2; prev=(prev+1)%2;
+
+int main(int argc, char **argv) {
+ FILE *fdin = stdin;
+ char *lines[2];
+ int cur = 1, prev = 0;
+ bool duplicate = false;
+
+ if (argc > 1) {
+ if (strncmp(argv[1], "-", 2) != 0) {
+ if ((fdin=fopen(argv[1], "r")) == NULL) {
+ LOGERR("ERROR: Failed to open file %s: %s\n",
+ argv[1], strerror(errno));
+ return EXIT_FAILURE;
+ }
+ }
+ }
+
+ lines[0] = calloc(LINE_LENGTH, sizeof(char));
+ lines[1] = calloc(LINE_LENGTH, sizeof(char));
+ if (lines[0] == NULL || lines[1] == NULL) {
+ LOGERR("ERROR: Failed to allocate 8kiB...\n");
+ return EXIT_FAILURE;
+ }
+
+ /* preparation */
+ if (fgets(lines[prev], LINE_LENGTH, fdin) == NULL) {
+ LOGERR("ERROR: Failed to read input line.\n");
+ return EXIT_FAILURE;
+ }
+
+ /* parsing input */
+ while (fgets(lines[cur], LINE_LENGTH, fdin) != NULL) {
+ if (strcmp(lines[cur], lines[prev]) == 0) {
+ duplicate = true;
+ continue;
+ }
+
+ if ( ! duplicate ) {
+ fputs(lines[prev], stdout);
+ } else {
+ duplicate = false;
+ }
+
+ INDEXSWITCH;
+ }
+
+ /* last line */
+ if ( ! duplicate ) {
+ fputs(lines[prev], stdout);
+ }
+
+
+ if (fdin != stdin)
+ fclose(fdin);
+
+ return EXIT_SUCCESS;
+}
+