aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2025-08-29 21:23:49 +0200
committerThorsten Töpper <atsutane@freethoughts.de>2025-08-29 21:23:49 +0200
commitba83a4e306a267048ad2fe39e8cf9128e6bde245 (patch)
tree10b6c3a7771cc7fb7bbf34b750a7e19f6c697cc2 /src
parentce1d49f055fe84049e1ab3bdc7969f3479796f6d (diff)
downloadsmall-utils-ba83a4e306a267048ad2fe39e8cf9128e6bde245.tar.gz
small-utils-ba83a4e306a267048ad2fe39e8cf9128e6bde245.tar.bz2
split_for_sort: Append mode implemented
There may be situations when not every input file is available at once, so those can't be handled in a single session. The append mode opens the files without overwriting the previous content so making the way the tool can be used in scripts more flexible.
Diffstat (limited to 'src')
-rw-r--r--src/split_for_sort.c70
1 files changed, 54 insertions, 16 deletions
diff --git a/src/split_for_sort.c b/src/split_for_sort.c
index 0dd8e32..5678f6d 100644
--- a/src/split_for_sort.c
+++ b/src/split_for_sort.c
@@ -10,6 +10,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
@@ -38,6 +39,9 @@ struct list_head {
size_t length;
};
+
+/* DECLARATIONS */
+
int push_into_list_unique(struct list_head *list, char *name);
int split_into_files(struct list_head *list, char *inputfile, char *prefix);
size_t list_check_length(struct list_head *list);
@@ -45,12 +49,23 @@ struct list_head *extend_list(struct list_head *list, size_t splitlength, char *
struct list_node *get_node(struct list_head *list, char *name);
void destroy_list(struct list_head *list);
void fflush_list(struct list_head *list);
-void print_list(struct list_head *list, FILE *out);
int set_nofile_limit_to_hard();
+#ifdef DEBUGBUILD
+void print_list(struct list_head *list, FILE *out);
+#endif
+
+/* GLOBAL VARIABLES */
+int option_append_mode = 0;
-int set_nofile_limit_to_hard() {
+
+/* IMPLEMENTATION */
+
+/* modern Linux distributions set the soft limit to 1024 file descriptors,
+ * this may not be sufficient, therefore increase to the hard limit.
+ * */
+inline int set_nofile_limit_to_hard() {
struct rlimit lim;
if (getrlimit(RLIMIT_NOFILE , &lim) != 0) {
LOGERR("ERROR: Failed to get RLIMIT_NOFILE: %s (errno %d)\n",
@@ -67,7 +82,7 @@ int set_nofile_limit_to_hard() {
}
-void destroy_list(struct list_head *list) {
+inline void destroy_list(struct list_head *list) {
struct list_node *n = NULL, *b = NULL;
if (list == NULL)
return;
@@ -82,7 +97,8 @@ void destroy_list(struct list_head *list) {
free(list);
}
-void fflush_list(struct list_head *list) {
+
+inline void fflush_list(struct list_head *list) {
struct list_node *n = NULL;
if (list == NULL) return;
n = list->first;
@@ -94,6 +110,8 @@ void fflush_list(struct list_head *list) {
}
}
+
+#ifdef DEBUGBUILD
void print_list(struct list_head *list, FILE *out) {
struct list_node *n = NULL;
if (list == NULL)
@@ -107,6 +125,8 @@ void print_list(struct list_head *list, FILE *out) {
n = n->next;
}
}
+#endif
+
inline size_t list_check_length(struct list_head *list) {
size_t l = 0;
@@ -124,6 +144,7 @@ inline size_t list_check_length(struct list_head *list) {
return l;
}
+
inline int push_into_list_unique(struct list_head *list, char *name) {
size_t name_length = 0;
struct list_node *ptr = NULL, *tmp = NULL;
@@ -158,6 +179,7 @@ inline int push_into_list_unique(struct list_head *list, char *name) {
return 0;
}
+
inline struct list_node *get_node(struct list_head *list, char *name) {
struct list_node *n = NULL;
if (list == NULL || name == NULL || name[0] == '\0') {
@@ -175,6 +197,7 @@ inline struct list_node *get_node(struct list_head *list, char *name) {
return NULL;
}
+
/* Open the given file and extend / create a list of output filenames,
* based on the read content.
* splitlength - the length of a line considered regarding the comparision.
@@ -243,6 +266,7 @@ struct list_head *extend_list(struct list_head *list, size_t splitlength, char *
return list;
}
+
/* Parse the inputfile and split it's content into the given list of targets
* with the prefix for the name of the target.
*/
@@ -252,7 +276,6 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) {
size_t i = 0, len=0, sl = 0;
struct list_node *node = NULL;
-
if (inputfile == NULL || strlen(inputfile) == 0) {
LOGERR("ERROR: no valid filename for input.\n");
return -1;
@@ -319,7 +342,7 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) {
/* keep the file descriptors open across the input files to be split */
if (node->fd == NULL) {
snprintf(filename, BUFFERSIZE, "%s%s", ((prefix==NULL)?"":prefix), node->name);
- if ((node->fd = fopen(filename, "w")) == NULL) {
+ if ((node->fd = fopen(filename, (option_append_mode)?"a":"w")) == NULL) {
LOGERR("ERROR: Failed to open file '%s': %s\n", filename, strerror(errno));
free(line);
free(line_head);
@@ -350,33 +373,48 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) {
return 0;
}
+
int main(int argc, char **argv) {
- int i = 3;
+ int data_index = 2, length_index = 1, opt = 0, output_index = 0;
size_t splitlength = 0;
struct list_head *list = NULL;
if (argc < 4) {
- fprintf(stderr, "Usage: %s prefix length files...\n\n", argv[0]);
- fprintf(stderr, "\tprefix - used with the output filenames\n"
+ fprintf(stderr, "Usage: %s prefix length files...\n", argv[0]);
+ fprintf(stderr, "or %s -a prefix length files...\n\n", argv[0]);
+ fprintf(stderr, "-a - don't overwrite existing files, instead append to those\n"
+ "\tprefix - used with the output filenames\n"
"\tlength - the number of characters relevant for comparing\n");
return EXIT_FAILURE;
}
- /* modern Linux distributions set the soft limit to 1024 file descriptors,
- * this may not be sufficient, therefore increase to the hard limit */
set_nofile_limit_to_hard();
- splitlength = strtoull(argv[2], NULL, 10);
+ while ((opt = getopt(argc, argv, "a")) != -1) {
+ switch (opt) {
+ case 'a':
+ option_append_mode = 1;
+ break;
+ default:
+ break;
+ };
+ }
+
+ data_index += optind;
+ length_index += optind;
+ output_index += optind;
+
+ splitlength = strtoull(argv[length_index], NULL, 10);
if (splitlength == 0) {
- LOGERR("ERROR: Failed to read valid length from argument '%s' base 10 number >=1 expected\n", argv[2]);
+ LOGERR("ERROR: Failed to read valid length from argument '%s' base 10 number >=1 expected\n", argv[length_index]);
return EXIT_FAILURE;
}
- for (i=3; i<argc; i++) {
+ for (; data_index<argc; data_index++) {
// fprintf(stderr, "=> Scan '%s' for bucket filenames\n", argv[i]);
- list = extend_list(list, splitlength, argv[i]);
+ list = extend_list(list, splitlength, argv[data_index]);
// fprintf(stderr,"=> Split '%s' into bucket files\n", argv[i]);
- if (split_into_files(list, argv[i], argv[1]) < 0) {
+ if (split_into_files(list, argv[data_index], argv[output_index]) < 0) {
destroy_list(list);
return EXIT_FAILURE;
}