From fba3371d4c82c95433ac1f09c1b498eaa8dd67d0 Mon Sep 17 00:00:00 2001 From: Thorsten Töpper Date: Sat, 30 Aug 2025 18:12:28 +0200 Subject: split_for_sort: handle filename - as stdin --- src/split_for_sort.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 125 insertions(+), 7 deletions(-) diff --git a/src/split_for_sort.c b/src/split_for_sort.c index 5678f6d..68b7819 100644 --- a/src/split_for_sort.c +++ b/src/split_for_sort.c @@ -45,6 +45,7 @@ struct list_head { int push_into_list_unique(struct list_head *list, char *name); int split_into_files(struct list_head *list, char *inputfile, char *prefix); size_t list_check_length(struct list_head *list); +struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix); struct list_head *extend_list(struct list_head *list, size_t splitlength, char *fname); struct list_node *get_node(struct list_head *list, char *name); void destroy_list(struct list_head *list); @@ -373,11 +374,116 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) { return 0; } +/* This covers extend_list and split_into_files for the stdin stream. + * The other two functions can't be used as extend_list would read + * the data from stdin and split_into_files would have nothing to work with. + */ +struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix) { + FILE *fdin = stdin; + char *filename = NULL, *line = NULL, *line_head = NULL, *previous = NULL; + size_t i=0, len = 0; + struct list_node *node = NULL; + + if ((line = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + return NULL; + } + if ((line_head = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + free(line); + return NULL; + } + + if ((previous = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + free(line); + free(line_head); + return NULL; + } + if ((filename = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for input buffer.\n"); + free(line); + free(line_head); + free(previous); + return NULL; + } + + if ((list == NULL) && \ + ((list = calloc(1, sizeof(struct list_head))) == NULL)) { + LOGERR("ERROR: Failed to create new list"); + free(line); + free(line_head); + free(previous); + free(filename); + return NULL; + } + list->splitlength = splitlength; + + while (fgets(line, BUFFERSIZE, fdin) != NULL) { + len = strlen(line); + len = (splitlength < len) ? splitlength : len; + /* most simple way to stick with FS compatible characters */ + for (i=0; ifd == NULL) { + snprintf(filename, BUFFERSIZE, "%s%s", ((prefix==NULL)?"":prefix), node->name); + if ((node->fd = fopen(filename, (option_append_mode)?"a":"w")) == NULL) { + LOGERR("ERROR: Failed to open file '%s': %s\n", filename, strerror(errno)); + free(line); + free(line_head); + free(previous); + free(filename); + destroy_list(list); + return NULL; + } + } + strncpy(previous, line_head, splitlength); + } + + if (fputs(line, node->fd) == EOF) { + LOGERR("ERROR: Failed to write into output file for '%s': %s\n", + line_head, strerror(errno)); + free(line); + free(line_head); + free(previous); + free(filename); + destroy_list(list); + return NULL; + } + } + + free(line); + free(line_head); + free(previous); + free(filename); + + return list; +} int main(int argc, char **argv) { int data_index = 2, length_index = 1, opt = 0, output_index = 0; size_t splitlength = 0; - struct list_head *list = NULL; + struct list_head *list = NULL, *lsttmp = NULL; if (argc < 4) { fprintf(stderr, "Usage: %s prefix length files...\n", argv[0]); @@ -411,12 +517,24 @@ int main(int argc, char **argv) { } for (; data_index Scan '%s' for bucket filenames\n", argv[i]); - list = extend_list(list, splitlength, argv[data_index]); -// fprintf(stderr,"=> Split '%s' into bucket files\n", argv[i]); - if (split_into_files(list, argv[data_index], argv[output_index]) < 0) { - destroy_list(list); - return EXIT_FAILURE; + if (argv[data_index][0] == '-' && argv[data_index][1] == '\0') { + lsttmp = stdin_handling(list, splitlength, argv[output_index]); + if (lsttmp == NULL) { + destroy_list(list); + return EXIT_FAILURE; + } + list = lsttmp; + } else { + lsttmp = extend_list(list, splitlength, argv[data_index]); + if (lsttmp == NULL) { + destroy_list(list); + return EXIT_FAILURE; + } + list = lsttmp; + if (split_into_files(list, argv[data_index], argv[output_index]) < 0) { + destroy_list(list); + return EXIT_FAILURE; + } } } -- cgit v1.2.3-70-g09d2