diff options
| author | Thorsten Töpper <atsutane@freethoughts.de> | 2025-08-30 18:12:28 +0200 |
|---|---|---|
| committer | Thorsten Töpper <atsutane@freethoughts.de> | 2025-08-30 18:12:28 +0200 |
| commit | fba3371d4c82c95433ac1f09c1b498eaa8dd67d0 (patch) | |
| tree | 9fdbfa6a385c468aeeb33e23043b291a1ed2fe3a | |
| parent | ba83a4e306a267048ad2fe39e8cf9128e6bde245 (diff) | |
| download | small-utils-fba3371d4c82c95433ac1f09c1b498eaa8dd67d0.tar.gz small-utils-fba3371d4c82c95433ac1f09c1b498eaa8dd67d0.tar.bz2 | |
split_for_sort: handle filename - as stdin
| -rw-r--r-- | src/split_for_sort.c | 132 |
1 files changed, 125 insertions, 7 deletions
diff --git a/src/split_for_sort.c b/src/split_for_sort.c index 5678f6d..68b7819 100644 --- a/src/split_for_sort.c +++ b/src/split_for_sort.c @@ -45,6 +45,7 @@ struct list_head { int push_into_list_unique(struct list_head *list, char *name); int split_into_files(struct list_head *list, char *inputfile, char *prefix); size_t list_check_length(struct list_head *list); +struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix); struct list_head *extend_list(struct list_head *list, size_t splitlength, char *fname); struct list_node *get_node(struct list_head *list, char *name); void destroy_list(struct list_head *list); @@ -373,11 +374,116 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) { return 0; } +/* This covers extend_list and split_into_files for the stdin stream. + * The other two functions can't be used as extend_list would read + * the data from stdin and split_into_files would have nothing to work with. + */ +struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix) { + FILE *fdin = stdin; + char *filename = NULL, *line = NULL, *line_head = NULL, *previous = NULL; + size_t i=0, len = 0; + struct list_node *node = NULL; + + if ((line = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + return NULL; + } + if ((line_head = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + free(line); + return NULL; + } + + if ((previous = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for read buffer\n"); + free(line); + free(line_head); + return NULL; + } + if ((filename = calloc(BUFFERSIZE, sizeof(char))) == NULL) { + LOGERR("ERROR: Failed to allocate memory for input buffer.\n"); + free(line); + free(line_head); + free(previous); + return NULL; + } + + if ((list == NULL) && \ + ((list = calloc(1, sizeof(struct list_head))) == NULL)) { + LOGERR("ERROR: Failed to create new list"); + free(line); + free(line_head); + free(previous); + free(filename); + return NULL; + } + list->splitlength = splitlength; + + while (fgets(line, BUFFERSIZE, fdin) != NULL) { + len = strlen(line); + len = (splitlength < len) ? splitlength : len; + /* most simple way to stick with FS compatible characters */ + for (i=0; i<len; i++) { + if (! isalnum(line[i]) ) { + line_head[i] = '_'; + } else { + line_head[i] = line[i]; + } + } + line_head[len] = '\0'; + + if (strncmp(line_head, previous, splitlength) != 0) { + if (push_into_list_unique(list, line_head) != 0) { + LOGERR("WARNING: Failed to insert '%s' into list. (file %s)\n", line, filename); + continue; + } + + node = get_node(list, line_head); + if (node == NULL) { + LOGERR("WARNING: no node found for %s - skip line\n", line_head); + continue; + } + + /* keep the file descriptors open across the input files to be split */ + if (node->fd == NULL) { + snprintf(filename, BUFFERSIZE, "%s%s", ((prefix==NULL)?"":prefix), node->name); + if ((node->fd = fopen(filename, (option_append_mode)?"a":"w")) == NULL) { + LOGERR("ERROR: Failed to open file '%s': %s\n", filename, strerror(errno)); + free(line); + free(line_head); + free(previous); + free(filename); + destroy_list(list); + return NULL; + } + } + strncpy(previous, line_head, splitlength); + } + + if (fputs(line, node->fd) == EOF) { + LOGERR("ERROR: Failed to write into output file for '%s': %s\n", + line_head, strerror(errno)); + free(line); + free(line_head); + free(previous); + free(filename); + destroy_list(list); + return NULL; + } + } + + free(line); + free(line_head); + free(previous); + free(filename); + + return list; +} int main(int argc, char **argv) { int data_index = 2, length_index = 1, opt = 0, output_index = 0; size_t splitlength = 0; - struct list_head *list = NULL; + struct list_head *list = NULL, *lsttmp = NULL; if (argc < 4) { fprintf(stderr, "Usage: %s prefix length files...\n", argv[0]); @@ -411,12 +517,24 @@ int main(int argc, char **argv) { } for (; data_index<argc; data_index++) { -// fprintf(stderr, "=> Scan '%s' for bucket filenames\n", argv[i]); - list = extend_list(list, splitlength, argv[data_index]); -// fprintf(stderr,"=> Split '%s' into bucket files\n", argv[i]); - if (split_into_files(list, argv[data_index], argv[output_index]) < 0) { - destroy_list(list); - return EXIT_FAILURE; + if (argv[data_index][0] == '-' && argv[data_index][1] == '\0') { + lsttmp = stdin_handling(list, splitlength, argv[output_index]); + if (lsttmp == NULL) { + destroy_list(list); + return EXIT_FAILURE; + } + list = lsttmp; + } else { + lsttmp = extend_list(list, splitlength, argv[data_index]); + if (lsttmp == NULL) { + destroy_list(list); + return EXIT_FAILURE; + } + list = lsttmp; + if (split_into_files(list, argv[data_index], argv[output_index]) < 0) { + destroy_list(list); + return EXIT_FAILURE; + } } } |
