aboutsummaryrefslogtreecommitdiff
path: root/src/split_for_sort.c
diff options
context:
space:
mode:
authorThorsten Töpper <atsutane@freethoughts.de>2025-08-30 20:49:34 +0200
committerThorsten Töpper <atsutane@freethoughts.de>2025-08-30 20:49:34 +0200
commit9b5c9a9e6199a224061af8284216032202fc0d94 (patch)
treee7caa192bc67bd82d1e4e1131de86ea6d0aaa40f /src/split_for_sort.c
parentfba3371d4c82c95433ac1f09c1b498eaa8dd67d0 (diff)
downloadsmall-utils-9b5c9a9e6199a224061af8284216032202fc0d94.tar.gz
small-utils-9b5c9a9e6199a224061af8284216032202fc0d94.tar.bz2
split_for_sort: performance improvement
In the background distribute the metadata across 63 lists instead of a single one. a-z A-Z 0-9 _ are the possible bytes on which decision via modulo happens. TODO: check whether this makes sense or wasting memory with 256 is more effective.
Diffstat (limited to 'src/split_for_sort.c')
-rw-r--r--src/split_for_sort.c98
1 files changed, 62 insertions, 36 deletions
diff --git a/src/split_for_sort.c b/src/split_for_sort.c
index 68b7819..55f396f 100644
--- a/src/split_for_sort.c
+++ b/src/split_for_sort.c
@@ -21,6 +21,9 @@
/* Also used for paths => at least PATH_MAX */
#define BUFFERSIZE 4096
+/* performance optimization. distribute the content across #alnumchars+1 (the _),
+ * ((unsigned char)name[prefix_length])%LISTS_NR */
+#define LISTS_NR 63
/* The arguments will be
* PREFIX - file prefix...
@@ -33,10 +36,11 @@ struct list_node {
FILE *fd;
};
+
struct list_head {
- struct list_node *first;
+ struct list_node *first[LISTS_NR];
size_t splitlength;
- size_t length;
+ size_t length[LISTS_NR];
};
@@ -59,7 +63,7 @@ void print_list(struct list_head *list, FILE *out);
/* GLOBAL VARIABLES */
int option_append_mode = 0;
-
+size_t prefix_length = 0;
/* IMPLEMENTATION */
@@ -85,15 +89,18 @@ inline int set_nofile_limit_to_hard() {
inline void destroy_list(struct list_head *list) {
struct list_node *n = NULL, *b = NULL;
- if (list == NULL)
- return;
- n = list->first;
- while (n != NULL) {
- b = n->next;
- if (n->name != NULL) free(n->name);
- if (n->fd != NULL) fclose(n->fd);
- free(n);
- n = b;
+ int i = 0;
+ if (list == NULL) return;
+
+ for (; i<LISTS_NR; i++) {
+ n = list->first[i];
+ while (n != NULL) {
+ b = n->next;
+ if (n->name != NULL) free(n->name);
+ if (n->fd != NULL) fclose(n->fd);
+ free(n);
+ n = b;
+ }
}
free(list);
}
@@ -101,13 +108,16 @@ inline void destroy_list(struct list_head *list) {
inline void fflush_list(struct list_head *list) {
struct list_node *n = NULL;
+ int i = 0;
if (list == NULL) return;
- n = list->first;
- while (n != NULL) {
- if (n->fd != NULL) {
- fflush(n->fd);
+ for (; i<LISTS_NR; i++) {
+ n = list->first[i];
+ while (n != NULL) {
+ if (n->fd != NULL) {
+ fflush(n->fd);
+ }
+ n = n->next;
}
- n = n->next;
}
}
@@ -115,45 +125,57 @@ inline void fflush_list(struct list_head *list) {
#ifdef DEBUGBUILD
void print_list(struct list_head *list, FILE *out) {
struct list_node *n = NULL;
+ int i = 0;
+
if (list == NULL)
return;
if (out == NULL) out = stderr;
- fprintf(out, "length: %lu\n",list_check_length(list));
- n = list->first;
- while (n != NULL) {
- fprintf(out, "node '%s' %s\n", n->name,
- ((n->fd != NULL) ? "open file descriptor" : ""));
- n = n->next;
+
+ for (i=0; i<LISTS_NR; i++) {
+ n = list->first[i];
+ while (n != NULL) {
+ fprintf(out, "node '%s' %s\n", n->name,
+ ((n->fd != NULL) ? "open file descriptor" : ""));
+ n = n->next;
+ }
}
+ //fprintf(out, "length: %lu\n", list_check_length(list));
}
#endif
inline size_t list_check_length(struct list_head *list) {
- size_t l = 0;
+ int i = 0;
+ size_t l = 0, total = 0;
struct list_node *ptr = NULL;
if (list == NULL) {
LOGERR("ERROR: No list given.\n");
return 0;
}
- ptr = list->first;
- while (ptr != NULL) {
- l++;
- ptr = ptr->next;
+ for (i=0; i<LISTS_NR; i++) {
+ l = 0;
+ ptr = list->first[i];
+ while (ptr != NULL) {
+ l++;
+ ptr = ptr->next;
+ }
+ list->length[i] = l;
+ total += l;
}
- list->length = l;
- return l;
+ return total;
}
inline int push_into_list_unique(struct list_head *list, char *name) {
- size_t name_length = 0;
+ size_t name_length = 0, selected_list = 0;
struct list_node *ptr = NULL, *tmp = NULL;
if (list == NULL || name == NULL || name[0] == '\0') {
LOGERR("ERROR: Invalid function arguments.\n");
return -1;
}
- ptr = list->first;
+ selected_list = ((unsigned char)name[prefix_length])%LISTS_NR;
+
+ ptr = list->first[selected_list];
while (ptr != NULL) {
if (strcmp(ptr->name, name) == 0) {
return 0;
@@ -174,21 +196,23 @@ inline int push_into_list_unique(struct list_head *list, char *name) {
}
memcpy(tmp->name, name, name_length);
tmp->fd = NULL;
- tmp->next = list->first;
- list->first = tmp;
- list->length++;
+ tmp->next = list->first[selected_list];
+ list->first[selected_list] = tmp;
+ list->length[selected_list]++;
return 0;
}
inline struct list_node *get_node(struct list_head *list, char *name) {
+ size_t selected_list = 0;
struct list_node *n = NULL;
if (list == NULL || name == NULL || name[0] == '\0') {
LOGERR("ERROR: Invalid arguments\n");
return NULL;
}
+ selected_list = ((unsigned char)name[prefix_length])%LISTS_NR;
- n = list->first;
+ n = list->first[selected_list];
while (n != NULL) {
if (strcmp(n->name, name) == 0)
return n;
@@ -510,6 +534,8 @@ int main(int argc, char **argv) {
length_index += optind;
output_index += optind;
+ prefix_length = strlen(argv[output_index]);
+
splitlength = strtoull(argv[length_index], NULL, 10);
if (splitlength == 0) {
LOGERR("ERROR: Failed to read valid length from argument '%s' base 10 number >=1 expected\n", argv[length_index]);