From 4642cf5b388bf60f6bea67ce3a5031d24bccd48a Mon Sep 17 00:00:00 2001 From: David Leonard Date: Sun, 20 Feb 2022 14:29:45 +1000 Subject: [PATCH] tsort: new applet function old new delta tsort_main - 578 +578 .rodata 104884 104906 +22 applet_names 2759 2765 +6 applet_main 1596 1600 +4 packed_usage 34290 34288 -2 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 3/1 up/down: 610/-2) Total: 608 bytes Signed-off-by: David Leonard Signed-off-by: Denys Vlasenko --- coreutils/tsort.c | 188 +++++++++++++++++++++++++++++++++++++ docs/posix_conformance.txt | 2 +- testsuite/tsort.tests | 110 ++++++++++++++++++++++ 3 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 coreutils/tsort.c create mode 100755 testsuite/tsort.tests diff --git a/coreutils/tsort.c b/coreutils/tsort.c new file mode 100644 index 000000000..dedb65b15 --- /dev/null +++ b/coreutils/tsort.c @@ -0,0 +1,188 @@ +/* vi: set sw=4 ts=4: */ +/* + * tsort implementation for busybox + * + * public domain -- David Leonard, 2022 + */ +//config:config TSORT +//config: bool "tsort (0.7 kb)" +//config: default y +//config: help +//config: tsort performs a topological sort. + +//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP)) + +//kbuild:lib-$(CONFIG_TSORT) += tsort.o + +/* BB_AUDIT SUSv3 compliant */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */ + +//usage:#define tsort_trivial_usage +//usage: "[FILE]" +//usage:#define tsort_full_usage "\n\n" +//usage: "Topological sort" +//usage:#define tsort_example_usage +//usage: "$ echo -e \"a b\\nb c\" | tsort\n" +//usage: "a\n" +//usage: "b\n" +//usage: "c\n" + +#include "libbb.h" +#include "common_bufsiz.h" + +struct node { + unsigned in_count; + unsigned out_count; + struct node **out; + char name[1]; +}; + +struct globals { + struct node **nodes; + unsigned nodes_len; +}; +#define G (*(struct globals*)bb_common_bufsiz1) +#define INIT_G() do { \ + setup_common_bufsiz(); \ + BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \ + G.nodes = NULL; \ + G.nodes_len = 0; \ +} while (0) + +static struct node * +get_node(const char *name) +{ + struct node *n; + unsigned a = 0; + unsigned b = G.nodes_len; + + /* Binary search for name */ + while (a != b) { + unsigned m = (a + b) / 2; + int cmp = strcmp(name, G.nodes[m]->name); + if (cmp == 0) + return G.nodes[m]; /* found */ + if (cmp < 0) { + b = m; + } else { + a = m + 1; + } + } + + /* Allocate new node */ + n = xzalloc(sizeof(*n) + strlen(name)); + //n->in_count = 0; + //n->out_count = 0; + //n->out = NULL; + strcpy(n->name, name); + + /* Insert to maintain sort */ + G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof(*G.nodes)); + memmove(&G.nodes[a + 1], &G.nodes[a], + (G.nodes_len - a) * sizeof(*G.nodes)); + G.nodes[a] = n; + G.nodes_len++; + return n; +} + +static void +add_edge(struct node *a, struct node *b) +{ + a->out = xrealloc_vector(a->out, 6, a->out_count); + a->out[a->out_count++] = b; + b->in_count++; +} + +int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int tsort_main(int argc UNUSED_PARAM, char **argv) +{ + char *line; + size_t linesz; + ssize_t len; + struct node *a; + int cycles; + + INIT_G(); + + if (argv[1]) { + if (argv[2]) + bb_show_usage(); + if (NOT_LONE_DASH(argv[1])) { + close(STDIN_FILENO); /* == 0 */ + xopen(argv[1], O_RDONLY); /* fd will be 0 */ + } + } + + /* Read in words separated by s */ + a = NULL; + line = NULL; + linesz = 0; + while ((len = getline(&line, &linesz, stdin)) != -1) { + char *s = line; + while (*(s = skip_whitespace(s)) != '\0') { + struct node *b; + char *word; + + word = s; + s = skip_non_whitespace(s); + if (*s) + *s++ = '\0'; + + /* Create nodes and edges for each word pair */ + b = get_node(word); + if (!a) { + a = b; + } else { + if (a != b) + add_edge(a, b); + a = NULL; + } + } + } +// Most other tools do not check for input read error (treat them as EOF) +// die_if_ferror(in, input_filename); + if (a) + bb_simple_error_msg_and_die("odd input"); + free(line); + + /* + * Kahn's algorithm: + * - find a node that has no incoming edges, print and remove it + * - repeat until the graph is empty + * - if any nodes are left, they form cycles. + */ + cycles = 0; + while (G.nodes_len) { + struct node *n; + unsigned i; + + /* Search for first node with no incoming edges */ + for (i = 0; i < G.nodes_len; i++) { + if (!G.nodes[i]->in_count) + break; + } + if (i == G.nodes_len) { + /* Must be a cycle; arbitraily break it at node 0 */ + cycles++; + i = 0; +#ifndef TINY + bb_error_msg("cycle at %s", G.nodes[i]->name); +#endif + } + + /* Remove the node (need no longer maintain sort) */ + n = G.nodes[i]; + G.nodes[i] = G.nodes[--G.nodes_len]; + + /* And remove its outgoing edges */ + for (i = 0; i < n->out_count; i++) + n->out[i]->in_count--; + free(n->out); + + puts(n->name); + free(n); + } + free(G.nodes); + + fflush_stdout_and_exit(cycles ? 1 : 0); +} diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt index 5e107d74d..8edbe3e15 100644 --- a/docs/posix_conformance.txt +++ b/docs/posix_conformance.txt @@ -24,7 +24,7 @@ POSIX Tools not supported: gencat, getconf, iconv, join, link, locale, localedef, lp, m4, mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove, qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput, - tsort, unlink, uucp, uustat, uux + unlink, uucp, uustat, uux POSIX Tools not supported (DEVELOPMENT): admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel, diff --git a/testsuite/tsort.tests b/testsuite/tsort.tests new file mode 100755 index 000000000..c6fe78272 --- /dev/null +++ b/testsuite/tsort.tests @@ -0,0 +1,110 @@ +#!/bin/sh + +# SUSv3 compliant sort tests. +# Public Domain, David Leonard 2022 + +. ./testing.sh + +# name cmd expected ./input stdin +testing "" "tsort" "a\n" "" "a a\n" +testing "" "tsort -" "a\n" "" "a a\n" +testing "" "tsort input" "a\n" "a a\n" "" +testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" "" +testing "" "tsort /dev/null" "" "" "" + +testing "tsort empty" tsort "" "" "" +testing "tsort blank" tsort "" "" "\n" +testing "tsort blanks" tsort "" "" "\n\n \t\n " + +# simple inputs having exactly one solution +testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n" +testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n" + + +# The following test helper accommodates future variable output because, as +# tsort is allowed to emit any total ordering that satisfies its input, +# should the implementation changes, these tests will remain valid. +# +# The idea is to verify that: +# - each input word is present EXACTLY ONCE in tsort's output +# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b' +# - the exit code is 0 + +tsort_test () { + fail= + name="$1"; shift + args="$*" + if [ $VERBOSE ]; then + echo "============" + echo "echo \"$args\" | tsort >actual" + fi + echo "$args" | tsort >actual + ec=$? + if [ $ec -ne 0 ]; then + fail "tsort exit $ec, expected 0" + fi + while [ $# -ne 0 ]; do + a=$1; shift + b=$1; shift + aline=$(grep -nxF "$a" /dev/null 2>/dev/null + ec=$? + if [ $ec -eq 0 ]; then + fail "$name: unexpected exit 0 ($*)" + fi + report "$name" +} + +fail () { + [ $VERBOSE ] && echo "ERROR: $*" + fail=1 +} + +report () { + if [ $fail ]; then + FAILCOUNT=$(($FAILCOUNT + 1)) + echo "FAIL: $*" + else + echo "PASS: $*" + fi +} + +tsort_test "tsort empty2" +tsort_test "tsort singleton" a a +tsort_test "tsort simple" a b b c +tsort_test "tsort 2singleton" a a b b +tsort_test "tsort medium" a b a b b c +tsort_test "tsort std.example" a b c c d e g g f g e f h h +tsort_test "tsort prefixes" a aa aa aaa aaaa aaaaa a aaaaa + +tsort_test_err "tsort odd" a +tsort_test_err "tsort odd2" a b c +tsort_test_err "tsort cycle" a b b a + +exit $FAILCOUNT