tsort: new applet

function old new delta tsort_main - 578 +578 .rodata 104884 104906 +22 applet_names 2759 2765 +6 applet_main 1596 1600 +4 packed_usage 34290 34288 -2 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 3/1 up/down: 610/-2) Total: 608 bytes Signed-off-by: David Leonard <d+busybox@adaptive-enterprises.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2022-02-20 14:29:45 +10:00
parent 52a7bf6fa6
commit 4642cf5b38
3 changed files with 299 additions and 1 deletions
@@ -0,0 +1,188 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * tsort implementation for busybox
+ *
+ * public domain -- David Leonard, 2022
+ */
+//config:config TSORT
+//config:	bool "tsort (0.7 kb)"
+//config:	default y
+//config:	help
+//config:	tsort performs a topological sort.
+
+//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_TSORT) += tsort.o
+
+/* BB_AUDIT SUSv3 compliant */
+/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */
+
+//usage:#define tsort_trivial_usage
+//usage:       "[FILE]"
+//usage:#define tsort_full_usage "\n\n"
+//usage:       "Topological sort"
+//usage:#define tsort_example_usage
+//usage:       "$ echo -e \"a b\\nb c\" | tsort\n"
+//usage:       "a\n"
+//usage:       "b\n"
+//usage:       "c\n"
+
+#include "libbb.h"
+#include "common_bufsiz.h"
+
+struct node {
+	unsigned in_count;
+	unsigned out_count;
+	struct node **out;
+	char name[1];
+};
+
+struct globals {
+	struct node **nodes;
+	unsigned nodes_len;
+};
+#define G (*(struct globals*)bb_common_bufsiz1)
+#define INIT_G() do { \
+	setup_common_bufsiz(); \
+	BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
+	G.nodes = NULL; \
+	G.nodes_len = 0; \
+} while (0)
+
+static struct node *
+get_node(const char *name)
+{
+	struct node *n;
+	unsigned a = 0;
+	unsigned b = G.nodes_len;
+
+	/* Binary search for name */
+	while (a != b) {
+		unsigned m = (a + b) / 2;
+		int cmp = strcmp(name, G.nodes[m]->name);
+		if (cmp == 0)
+			return G.nodes[m]; /* found */
+		if (cmp < 0) {
+			b = m;
+		} else {
+			a = m + 1;
+		}
+	}
+
+	/* Allocate new node */
+	n = xzalloc(sizeof(*n) + strlen(name));
+	//n->in_count = 0;
+	//n->out_count = 0;
+	//n->out = NULL;
+	strcpy(n->name, name);
+
+	/* Insert to maintain sort */
+	G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof(*G.nodes));
+	memmove(&G.nodes[a + 1], &G.nodes[a],
+		(G.nodes_len - a) * sizeof(*G.nodes));
+	G.nodes[a] = n;
+	G.nodes_len++;
+	return n;
+}
+
+static void
+add_edge(struct node *a, struct node *b)
+{
+	a->out = xrealloc_vector(a->out, 6, a->out_count);
+	a->out[a->out_count++] = b;
+	b->in_count++;
+}
+
+int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int tsort_main(int argc UNUSED_PARAM, char **argv)
+{
+	char *line;
+	size_t linesz;
+	ssize_t len;
+	struct node *a;
+	int cycles;
+
+	INIT_G();
+
+	if (argv[1]) {
+		if (argv[2])
+			bb_show_usage();
+		if (NOT_LONE_DASH(argv[1])) {
+			close(STDIN_FILENO); /* == 0 */
+			xopen(argv[1], O_RDONLY); /* fd will be 0 */
+		}
+	}
+
+	/* Read in words separated by <blank>s */
+	a = NULL;
+	line = NULL;
+	linesz = 0;
+	while ((len = getline(&line, &linesz, stdin)) != -1) {
+		char *s = line;
+		while (*(s = skip_whitespace(s)) != '\0') {
+			struct node *b;
+			char *word;
+
+			word = s;
+			s = skip_non_whitespace(s);
+			if (*s)
+				*s++ = '\0';
+
+			/* Create nodes and edges for each word pair */
+			b = get_node(word);
+			if (!a) {
+				a = b;
+			} else {
+				if (a != b)
+					add_edge(a, b);
+				a = NULL;
+			}
+		}
+	}
+// Most other tools do not check for input read error (treat them as EOF)
+//	die_if_ferror(in, input_filename);
+	if (a)
+		bb_simple_error_msg_and_die("odd input");
+	free(line);
+
+	/*
+	 * Kahn's algorithm:
+	 *   - find a node that has no incoming edges, print and remove it
+	 *   - repeat until the graph is empty
+	 *   - if any nodes are left, they form cycles.
+	 */
+	cycles = 0;
+	while (G.nodes_len) {
+		struct node *n;
+		unsigned i;
+
+		/* Search for first node with no incoming edges */
+		for (i = 0; i < G.nodes_len; i++) {
+			if (!G.nodes[i]->in_count)
+				break;
+		}
+		if (i == G.nodes_len) {
+			/* Must be a cycle; arbitraily break it at node 0 */
+			cycles++;
+			i = 0;
+#ifndef TINY
+			bb_error_msg("cycle at %s", G.nodes[i]->name);
+#endif
+		}
+
+		/* Remove the node (need no longer maintain sort) */
+		n = G.nodes[i];
+		G.nodes[i] = G.nodes[--G.nodes_len];
+
+		/* And remove its outgoing edges */
+		for (i = 0; i < n->out_count; i++)
+			n->out[i]->in_count--;
+		free(n->out);
+
+		puts(n->name);
+		free(n);
+	}
+	free(G.nodes);
+
+	fflush_stdout_and_exit(cycles ? 1 : 0);
+}
@@ -24,7 +24,7 @@ POSIX Tools not supported:
  gencat, getconf, iconv, join, link, locale, localedef, lp, m4,
  mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,
  qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,
-  tsort, unlink, uucp, uustat, uux
+  unlink, uucp, uustat, uux

 POSIX Tools not supported (DEVELOPMENT):
  admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,
@@ -0,0 +1,110 @@
+#!/bin/sh
+
+# SUSv3 compliant sort tests.
+# Public Domain, David Leonard 2022
+
+. ./testing.sh
+
+#       name cmd expected ./input stdin
+testing "" "tsort"       "a\n" "" "a a\n"
+testing "" "tsort -"     "a\n" "" "a a\n"
+testing "" "tsort input" "a\n" "a a\n" ""
+testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" ""
+testing "" "tsort /dev/null" "" "" ""
+
+testing "tsort empty" tsort "" "" ""
+testing "tsort blank" tsort "" "" "\n"
+testing "tsort blanks" tsort "" "" "\n\n \t\n "
+
+# simple inputs having exactly one solution
+testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n"
+testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n"
+
+
+# The following test helper accommodates future variable output because, as
+# tsort is allowed to emit any total ordering that satisfies its input,
+# should the implementation changes, these tests will remain valid.
+#
+# The idea is to verify that:
+# - each input word is present EXACTLY ONCE in tsort's output
+# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b'
+# - the exit code is 0
+
+tsort_test () {
+	fail=
+	name="$1"; shift
+	args="$*"
+	if [ $VERBOSE ]; then
+		echo "============"
+		echo "echo \"$args\" | tsort >actual"
+	fi
+	echo "$args" | tsort >actual
+	ec=$?
+	if [ $ec -ne 0 ]; then
+		fail "tsort exit $ec, expected 0"
+	fi
+	while [ $# -ne 0 ]; do
+		a=$1; shift
+		b=$1; shift
+		aline=$(grep -nxF "$a" <actual | cut -d: -f1)
+		bline=$(grep -nxF "$b" <actual | cut -d: -f1)
+		case $aline in
+			"") fail "word $a missing from output ($args)";;
+			*" "*) fail "word $a duplicated ($args)";;
+		esac
+		case $bline in
+			"") fail "word $b missing from output ($args)";;
+			*" "*) fail "word $b duplicated ($args)";;
+		esac
+		if [ $aline -gt $bline ]; then
+			fail "$a appears after $b ($args)"
+		fi
+	done
+	if [ $fail ] && [ $VERBOSE ]; then
+		echo "exit $ec, actual:"
+		cat actual
+	fi
+	rm actual
+	report "$name"
+}
+
+# Test that erroneous input causes an unsuccessful exit code
+# we don't test the output error message
+tsort_test_err () {
+	fail=
+	name="$1"; shift
+	echo "$*" | tsort >/dev/null 2>/dev/null
+	ec=$?
+	if [ $ec -eq 0 ]; then
+		fail "$name: unexpected exit 0 ($*)"
+	fi
+	report "$name"
+}
+
+fail () {
+	[ $VERBOSE ] && echo "ERROR: $*"
+	fail=1
+}
+
+report () {
+	if [ $fail ]; then
+		FAILCOUNT=$(($FAILCOUNT + 1))
+		echo "FAIL: $*"
+	else
+		echo "PASS: $*"
+	fi
+}
+
+tsort_test "tsort empty2"
+tsort_test "tsort singleton"    a a
+tsort_test "tsort simple"       a b b c
+tsort_test "tsort 2singleton"   a a b b
+tsort_test "tsort medium"       a b a b b c
+tsort_test "tsort std.example"  a b c c d e g g f g e f h h
+tsort_test "tsort prefixes"     a aa aa aaa aaaa aaaaa a aaaaa
+
+tsort_test_err "tsort odd"      a
+tsort_test_err "tsort odd2"     a b c
+tsort_test_err "tsort cycle"    a b b a
+
+exit $FAILCOUNT