diff options
| author | Thorsten Töpper <atsutane@freethoughts.de> | 2025-08-29 21:23:49 +0200 |
|---|---|---|
| committer | Thorsten Töpper <atsutane@freethoughts.de> | 2025-08-29 21:23:49 +0200 |
| commit | ba83a4e306a267048ad2fe39e8cf9128e6bde245 (patch) | |
| tree | 10b6c3a7771cc7fb7bbf34b750a7e19f6c697cc2 /src | |
| parent | ce1d49f055fe84049e1ab3bdc7969f3479796f6d (diff) | |
| download | small-utils-ba83a4e306a267048ad2fe39e8cf9128e6bde245.tar.gz small-utils-ba83a4e306a267048ad2fe39e8cf9128e6bde245.tar.bz2 | |
split_for_sort: Append mode implemented
There may be situations when not every input file is available at once,
so those can't be handled in a single session. The append mode opens the
files without overwriting the previous content so making the way the
tool can be used in scripts more flexible.
Diffstat (limited to 'src')
| -rw-r--r-- | src/split_for_sort.c | 70 |
1 files changed, 54 insertions, 16 deletions
diff --git a/src/split_for_sort.c b/src/split_for_sort.c index 0dd8e32..5678f6d 100644 --- a/src/split_for_sort.c +++ b/src/split_for_sort.c @@ -10,6 +10,7 @@ */ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <string.h> #include <ctype.h> #include <errno.h> @@ -38,6 +39,9 @@ struct list_head { size_t length; }; + +/* DECLARATIONS */ + int push_into_list_unique(struct list_head *list, char *name); int split_into_files(struct list_head *list, char *inputfile, char *prefix); size_t list_check_length(struct list_head *list); @@ -45,12 +49,23 @@ struct list_head *extend_list(struct list_head *list, size_t splitlength, char * struct list_node *get_node(struct list_head *list, char *name); void destroy_list(struct list_head *list); void fflush_list(struct list_head *list); -void print_list(struct list_head *list, FILE *out); int set_nofile_limit_to_hard(); +#ifdef DEBUGBUILD +void print_list(struct list_head *list, FILE *out); +#endif + +/* GLOBAL VARIABLES */ +int option_append_mode = 0; -int set_nofile_limit_to_hard() { + +/* IMPLEMENTATION */ + +/* modern Linux distributions set the soft limit to 1024 file descriptors, + * this may not be sufficient, therefore increase to the hard limit. + * */ +inline int set_nofile_limit_to_hard() { struct rlimit lim; if (getrlimit(RLIMIT_NOFILE , &lim) != 0) { LOGERR("ERROR: Failed to get RLIMIT_NOFILE: %s (errno %d)\n", @@ -67,7 +82,7 @@ int set_nofile_limit_to_hard() { } -void destroy_list(struct list_head *list) { +inline void destroy_list(struct list_head *list) { struct list_node *n = NULL, *b = NULL; if (list == NULL) return; @@ -82,7 +97,8 @@ void destroy_list(struct list_head *list) { free(list); } -void fflush_list(struct list_head *list) { + +inline void fflush_list(struct list_head *list) { struct list_node *n = NULL; if (list == NULL) return; n = list->first; @@ -94,6 +110,8 @@ void fflush_list(struct list_head *list) { } } + +#ifdef DEBUGBUILD void print_list(struct list_head *list, FILE *out) { struct list_node *n = NULL; if (list == NULL) @@ -107,6 +125,8 @@ void print_list(struct list_head *list, FILE *out) { n = n->next; } } +#endif + inline size_t list_check_length(struct list_head *list) { size_t l = 0; @@ -124,6 +144,7 @@ inline size_t list_check_length(struct list_head *list) { return l; } + inline int push_into_list_unique(struct list_head *list, char *name) { size_t name_length = 0; struct list_node *ptr = NULL, *tmp = NULL; @@ -158,6 +179,7 @@ inline int push_into_list_unique(struct list_head *list, char *name) { return 0; } + inline struct list_node *get_node(struct list_head *list, char *name) { struct list_node *n = NULL; if (list == NULL || name == NULL || name[0] == '\0') { @@ -175,6 +197,7 @@ inline struct list_node *get_node(struct list_head *list, char *name) { return NULL; } + /* Open the given file and extend / create a list of output filenames, * based on the read content. * splitlength - the length of a line considered regarding the comparision. @@ -243,6 +266,7 @@ struct list_head *extend_list(struct list_head *list, size_t splitlength, char * return list; } + /* Parse the inputfile and split it's content into the given list of targets * with the prefix for the name of the target. */ @@ -252,7 +276,6 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) { size_t i = 0, len=0, sl = 0; struct list_node *node = NULL; - if (inputfile == NULL || strlen(inputfile) == 0) { LOGERR("ERROR: no valid filename for input.\n"); return -1; @@ -319,7 +342,7 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) { /* keep the file descriptors open across the input files to be split */ if (node->fd == NULL) { snprintf(filename, BUFFERSIZE, "%s%s", ((prefix==NULL)?"":prefix), node->name); - if ((node->fd = fopen(filename, "w")) == NULL) { + if ((node->fd = fopen(filename, (option_append_mode)?"a":"w")) == NULL) { LOGERR("ERROR: Failed to open file '%s': %s\n", filename, strerror(errno)); free(line); free(line_head); @@ -350,33 +373,48 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) { return 0; } + int main(int argc, char **argv) { - int i = 3; + int data_index = 2, length_index = 1, opt = 0, output_index = 0; size_t splitlength = 0; struct list_head *list = NULL; if (argc < 4) { - fprintf(stderr, "Usage: %s prefix length files...\n\n", argv[0]); - fprintf(stderr, "\tprefix - used with the output filenames\n" + fprintf(stderr, "Usage: %s prefix length files...\n", argv[0]); + fprintf(stderr, "or %s -a prefix length files...\n\n", argv[0]); + fprintf(stderr, "-a - don't overwrite existing files, instead append to those\n" + "\tprefix - used with the output filenames\n" "\tlength - the number of characters relevant for comparing\n"); return EXIT_FAILURE; } - /* modern Linux distributions set the soft limit to 1024 file descriptors, - * this may not be sufficient, therefore increase to the hard limit */ set_nofile_limit_to_hard(); - splitlength = strtoull(argv[2], NULL, 10); + while ((opt = getopt(argc, argv, "a")) != -1) { + switch (opt) { + case 'a': + option_append_mode = 1; + break; + default: + break; + }; + } + + data_index += optind; + length_index += optind; + output_index += optind; + + splitlength = strtoull(argv[length_index], NULL, 10); if (splitlength == 0) { - LOGERR("ERROR: Failed to read valid length from argument '%s' base 10 number >=1 expected\n", argv[2]); + LOGERR("ERROR: Failed to read valid length from argument '%s' base 10 number >=1 expected\n", argv[length_index]); return EXIT_FAILURE; } - for (i=3; i<argc; i++) { + for (; data_index<argc; data_index++) { // fprintf(stderr, "=> Scan '%s' for bucket filenames\n", argv[i]); - list = extend_list(list, splitlength, argv[i]); + list = extend_list(list, splitlength, argv[data_index]); // fprintf(stderr,"=> Split '%s' into bucket files\n", argv[i]); - if (split_into_files(list, argv[i], argv[1]) < 0) { + if (split_into_files(list, argv[data_index], argv[output_index]) < 0) { destroy_list(list); return EXIT_FAILURE; } |
