aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2025-08-30 18:12:28 +0200
committerThorsten Töpper <atsutane@freethoughts.de>2025-08-30 18:12:28 +0200
commitfba3371d4c82c95433ac1f09c1b498eaa8dd67d0 (patch)
tree9fdbfa6a385c468aeeb33e23043b291a1ed2fe3a /src
parentba83a4e306a267048ad2fe39e8cf9128e6bde245 (diff)
downloadsmall-utils-fba3371d4c82c95433ac1f09c1b498eaa8dd67d0.tar.gz
small-utils-fba3371d4c82c95433ac1f09c1b498eaa8dd67d0.tar.bz2
split_for_sort: handle filename - as stdin
Diffstat (limited to 'src')
-rw-r--r--src/split_for_sort.c132
1 files changed, 125 insertions, 7 deletions
diff --git a/src/split_for_sort.c b/src/split_for_sort.c
index 5678f6d..68b7819 100644
--- a/src/split_for_sort.c
+++ b/src/split_for_sort.c
@@ -45,6 +45,7 @@ struct list_head {
int push_into_list_unique(struct list_head *list, char *name);
int split_into_files(struct list_head *list, char *inputfile, char *prefix);
size_t list_check_length(struct list_head *list);
+struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix);
struct list_head *extend_list(struct list_head *list, size_t splitlength, char *fname);
struct list_node *get_node(struct list_head *list, char *name);
void destroy_list(struct list_head *list);
@@ -373,11 +374,116 @@ int split_into_files(struct list_head *list, char *inputfile, char *prefix) {
return 0;
}
+/* This covers extend_list and split_into_files for the stdin stream.
+ * The other two functions can't be used as extend_list would read
+ * the data from stdin and split_into_files would have nothing to work with.
+ */
+struct list_head *stdin_handling(struct list_head *list, size_t splitlength, char *prefix) {
+ FILE *fdin = stdin;
+ char *filename = NULL, *line = NULL, *line_head = NULL, *previous = NULL;
+ size_t i=0, len = 0;
+ struct list_node *node = NULL;
+
+ if ((line = calloc(BUFFERSIZE, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for read buffer\n");
+ return NULL;
+ }
+ if ((line_head = calloc(BUFFERSIZE, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for read buffer\n");
+ free(line);
+ return NULL;
+ }
+
+ if ((previous = calloc(BUFFERSIZE, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for read buffer\n");
+ free(line);
+ free(line_head);
+ return NULL;
+ }
+ if ((filename = calloc(BUFFERSIZE, sizeof(char))) == NULL) {
+ LOGERR("ERROR: Failed to allocate memory for input buffer.\n");
+ free(line);
+ free(line_head);
+ free(previous);
+ return NULL;
+ }
+
+ if ((list == NULL) && \
+ ((list = calloc(1, sizeof(struct list_head))) == NULL)) {
+ LOGERR("ERROR: Failed to create new list");
+ free(line);
+ free(line_head);
+ free(previous);
+ free(filename);
+ return NULL;
+ }
+ list->splitlength = splitlength;
+
+ while (fgets(line, BUFFERSIZE, fdin) != NULL) {
+ len = strlen(line);
+ len = (splitlength < len) ? splitlength : len;
+ /* most simple way to stick with FS compatible characters */
+ for (i=0; i<len; i++) {
+ if (! isalnum(line[i]) ) {
+ line_head[i] = '_';
+ } else {
+ line_head[i] = line[i];
+ }
+ }
+ line_head[len] = '\0';
+
+ if (strncmp(line_head, previous, splitlength) != 0) {
+ if (push_into_list_unique(list, line_head) != 0) {
+ LOGERR("WARNING: Failed to insert '%s' into list. (file %s)\n", line, filename);
+ continue;
+ }
+
+ node = get_node(list, line_head);
+ if (node == NULL) {
+ LOGERR("WARNING: no node found for %s - skip line\n", line_head);
+ continue;
+ }
+
+ /* keep the file descriptors open across the input files to be split */
+ if (node->fd == NULL) {
+ snprintf(filename, BUFFERSIZE, "%s%s", ((prefix==NULL)?"":prefix), node->name);
+ if ((node->fd = fopen(filename, (option_append_mode)?"a":"w")) == NULL) {
+ LOGERR("ERROR: Failed to open file '%s': %s\n", filename, strerror(errno));
+ free(line);
+ free(line_head);
+ free(previous);
+ free(filename);
+ destroy_list(list);
+ return NULL;
+ }
+ }
+ strncpy(previous, line_head, splitlength);
+ }
+
+ if (fputs(line, node->fd) == EOF) {
+ LOGERR("ERROR: Failed to write into output file for '%s': %s\n",
+ line_head, strerror(errno));
+ free(line);
+ free(line_head);
+ free(previous);
+ free(filename);
+ destroy_list(list);
+ return NULL;
+ }
+ }
+
+ free(line);
+ free(line_head);
+ free(previous);
+ free(filename);
+
+ return list;
+}
int main(int argc, char **argv) {
int data_index = 2, length_index = 1, opt = 0, output_index = 0;
size_t splitlength = 0;
- struct list_head *list = NULL;
+ struct list_head *list = NULL, *lsttmp = NULL;
if (argc < 4) {
fprintf(stderr, "Usage: %s prefix length files...\n", argv[0]);
@@ -411,12 +517,24 @@ int main(int argc, char **argv) {
}
for (; data_index<argc; data_index++) {
-// fprintf(stderr, "=> Scan '%s' for bucket filenames\n", argv[i]);
- list = extend_list(list, splitlength, argv[data_index]);
-// fprintf(stderr,"=> Split '%s' into bucket files\n", argv[i]);
- if (split_into_files(list, argv[data_index], argv[output_index]) < 0) {
- destroy_list(list);
- return EXIT_FAILURE;
+ if (argv[data_index][0] == '-' && argv[data_index][1] == '\0') {
+ lsttmp = stdin_handling(list, splitlength, argv[output_index]);
+ if (lsttmp == NULL) {
+ destroy_list(list);
+ return EXIT_FAILURE;
+ }
+ list = lsttmp;
+ } else {
+ lsttmp = extend_list(list, splitlength, argv[data_index]);
+ if (lsttmp == NULL) {
+ destroy_list(list);
+ return EXIT_FAILURE;
+ }
+ list = lsttmp;
+ if (split_into_files(list, argv[data_index], argv[output_index]) < 0) {
+ destroy_list(list);
+ return EXIT_FAILURE;
+ }
}
}