diff options
| author | Thorsten Töpper <atsutane@freethoughts.de> | 2025-09-10 21:23:39 +0200 |
|---|---|---|
| committer | Thorsten Töpper <atsutane@freethoughts.de> | 2025-09-10 21:23:39 +0200 |
| commit | 3d4e2fc261d3471eac5e68c5905ab5980f0ff89f (patch) | |
| tree | a28e3e8a6e58a65dd88e652a502a4b16b5ed6405 | |
| parent | c4e08ec3777b78b6f788eec6737524cc5e856107 (diff) | |
| download | small-utils-3d4e2fc261d3471eac5e68c5905ab5980f0ff89f.tar.gz small-utils-3d4e2fc261d3471eac5e68c5905ab5980f0ff89f.tar.bz2 | |
custom_uniq: uniq -u buggy? workaround
| -rw-r--r-- | Makefile | 8 | ||||
| -rw-r--r-- | src/custom_uniq.c | 82 |
2 files changed, 90 insertions, 0 deletions
@@ -12,12 +12,14 @@ CFLAGS += -I include all: \ + out/custom_uniq \ out/mem_internal_check \ out/split_for_sort \ out/tree_based_check debug: \ + out/debug/custom_uniq \ out/debug/mem_internal_check \ out/debug/split_for_sort \ out/debug/tree_based_check @@ -38,6 +40,12 @@ out/debug: out # Lazy, not splitting compiler & linker calls +out/custom_uniq: out src/custom_uniq.c include/trace_macros.h + ${CC} -o $@ ${CFLAGS} ${PROD_CFLAGS} src/custom_uniq.c + +out/debug/custom_uniq: out/debug src/custom_uniq.c include/trace_macros.h + ${CC} -o $@ ${CFLAGS} ${DEBUG_CFLAGS} src/custom_uniq.c + out/mem_internal_check: out src/mem_internal_check.c \ include/trace_macros.h include/hex_conversion.h ${CC} -o $@ ${CFLAGS} ${PROD_CFLAGS} src/mem_internal_check.c diff --git a/src/custom_uniq.c b/src/custom_uniq.c new file mode 100644 index 0000000..82ce3da --- /dev/null +++ b/src/custom_uniq.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/* Copyright 2025 Thorsten Töpper + * + * I noticed GNU uniq -u not always removed all overlaps, instead + * of spending hours in creating a reproducable test case and code + * review, a simple custom implementation of that mode. + * + * vim:ts=4:sw=4:expandtab + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> + +#include "trace_macros.h" + +#define LINE_LENGTH 4096 + +/* prev=(cur)?0:1; less cycles? */ +#define INDEXSWITCH cur=(cur+1)%2; prev=(prev+1)%2; + +int main(int argc, char **argv) { + FILE *fdin = stdin; + char *lines[2]; + int cur = 1, prev = 0; + bool duplicate = false; + + if (argc > 1) { + if (strncmp(argv[1], "-", 2) != 0) { + if ((fdin=fopen(argv[1], "r")) == NULL) { + LOGERR("ERROR: Failed to open file %s: %s\n", + argv[1], strerror(errno)); + return EXIT_FAILURE; + } + } + } + + lines[0] = calloc(LINE_LENGTH, sizeof(char)); + lines[1] = calloc(LINE_LENGTH, sizeof(char)); + if (lines[0] == NULL || lines[1] == NULL) { + LOGERR("ERROR: Failed to allocate 8kiB...\n"); + return EXIT_FAILURE; + } + + /* preparation */ + if (fgets(lines[prev], LINE_LENGTH, fdin) == NULL) { + LOGERR("ERROR: Failed to read input line.\n"); + return EXIT_FAILURE; + } + + /* parsing input */ + while (fgets(lines[cur], LINE_LENGTH, fdin) != NULL) { + if (strcmp(lines[cur], lines[prev]) == 0) { + duplicate = true; + continue; + } + + if ( ! duplicate ) { + fputs(lines[prev], stdout); + } else { + duplicate = false; + } + + INDEXSWITCH; + } + + /* last line */ + if ( ! duplicate ) { + fputs(lines[prev], stdout); + } + + + if (fdin != stdin) + fclose(fdin); + + return EXIT_SUCCESS; +} + |
