tsort: new applet

function                                             old     new   delta
tsort_main                                             -     578    +578
.rodata                                           104884  104906     +22
applet_names                                        2759    2765      +6
applet_main                                         1596    1600      +4
packed_usage                                       34290   34288      -2
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 3/1 up/down: 610/-2)            Total: 608 bytes

Signed-off-by: David Leonard <d+busybox@adaptive-enterprises.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
David Leonard 2022-02-20 14:29:45 +10:00 committed by Denys Vlasenko
parent 52a7bf6fa6
commit 4642cf5b38
3 changed files with 299 additions and 1 deletions

188
coreutils/tsort.c Normal file
View File

@ -0,0 +1,188 @@
/* vi: set sw=4 ts=4: */
/*
* tsort implementation for busybox
*
* public domain -- David Leonard, 2022
*/
//config:config TSORT
//config: bool "tsort (0.7 kb)"
//config: default y
//config: help
//config: tsort performs a topological sort.
//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP))
//kbuild:lib-$(CONFIG_TSORT) += tsort.o
/* BB_AUDIT SUSv3 compliant */
/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */
//usage:#define tsort_trivial_usage
//usage: "[FILE]"
//usage:#define tsort_full_usage "\n\n"
//usage: "Topological sort"
//usage:#define tsort_example_usage
//usage: "$ echo -e \"a b\\nb c\" | tsort\n"
//usage: "a\n"
//usage: "b\n"
//usage: "c\n"
#include "libbb.h"
#include "common_bufsiz.h"
struct node {
unsigned in_count;
unsigned out_count;
struct node **out;
char name[1];
};
struct globals {
struct node **nodes;
unsigned nodes_len;
};
#define G (*(struct globals*)bb_common_bufsiz1)
#define INIT_G() do { \
setup_common_bufsiz(); \
BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
G.nodes = NULL; \
G.nodes_len = 0; \
} while (0)
static struct node *
get_node(const char *name)
{
struct node *n;
unsigned a = 0;
unsigned b = G.nodes_len;
/* Binary search for name */
while (a != b) {
unsigned m = (a + b) / 2;
int cmp = strcmp(name, G.nodes[m]->name);
if (cmp == 0)
return G.nodes[m]; /* found */
if (cmp < 0) {
b = m;
} else {
a = m + 1;
}
}
/* Allocate new node */
n = xzalloc(sizeof(*n) + strlen(name));
//n->in_count = 0;
//n->out_count = 0;
//n->out = NULL;
strcpy(n->name, name);
/* Insert to maintain sort */
G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof(*G.nodes));
memmove(&G.nodes[a + 1], &G.nodes[a],
(G.nodes_len - a) * sizeof(*G.nodes));
G.nodes[a] = n;
G.nodes_len++;
return n;
}
static void
add_edge(struct node *a, struct node *b)
{
a->out = xrealloc_vector(a->out, 6, a->out_count);
a->out[a->out_count++] = b;
b->in_count++;
}
int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int tsort_main(int argc UNUSED_PARAM, char **argv)
{
char *line;
size_t linesz;
ssize_t len;
struct node *a;
int cycles;
INIT_G();
if (argv[1]) {
if (argv[2])
bb_show_usage();
if (NOT_LONE_DASH(argv[1])) {
close(STDIN_FILENO); /* == 0 */
xopen(argv[1], O_RDONLY); /* fd will be 0 */
}
}
/* Read in words separated by <blank>s */
a = NULL;
line = NULL;
linesz = 0;
while ((len = getline(&line, &linesz, stdin)) != -1) {
char *s = line;
while (*(s = skip_whitespace(s)) != '\0') {
struct node *b;
char *word;
word = s;
s = skip_non_whitespace(s);
if (*s)
*s++ = '\0';
/* Create nodes and edges for each word pair */
b = get_node(word);
if (!a) {
a = b;
} else {
if (a != b)
add_edge(a, b);
a = NULL;
}
}
}
// Most other tools do not check for input read error (treat them as EOF)
// die_if_ferror(in, input_filename);
if (a)
bb_simple_error_msg_and_die("odd input");
free(line);
/*
* Kahn's algorithm:
* - find a node that has no incoming edges, print and remove it
* - repeat until the graph is empty
* - if any nodes are left, they form cycles.
*/
cycles = 0;
while (G.nodes_len) {
struct node *n;
unsigned i;
/* Search for first node with no incoming edges */
for (i = 0; i < G.nodes_len; i++) {
if (!G.nodes[i]->in_count)
break;
}
if (i == G.nodes_len) {
/* Must be a cycle; arbitraily break it at node 0 */
cycles++;
i = 0;
#ifndef TINY
bb_error_msg("cycle at %s", G.nodes[i]->name);
#endif
}
/* Remove the node (need no longer maintain sort) */
n = G.nodes[i];
G.nodes[i] = G.nodes[--G.nodes_len];
/* And remove its outgoing edges */
for (i = 0; i < n->out_count; i++)
n->out[i]->in_count--;
free(n->out);
puts(n->name);
free(n);
}
free(G.nodes);
fflush_stdout_and_exit(cycles ? 1 : 0);
}

View File

@ -24,7 +24,7 @@ POSIX Tools not supported:
gencat, getconf, iconv, join, link, locale, localedef, lp, m4,
mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,
qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,
tsort, unlink, uucp, uustat, uux
unlink, uucp, uustat, uux
POSIX Tools not supported (DEVELOPMENT):
admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,

110
testsuite/tsort.tests Executable file
View File

@ -0,0 +1,110 @@
#!/bin/sh
# SUSv3 compliant sort tests.
# Public Domain, David Leonard 2022
. ./testing.sh
# name cmd expected ./input stdin
testing "" "tsort" "a\n" "" "a a\n"
testing "" "tsort -" "a\n" "" "a a\n"
testing "" "tsort input" "a\n" "a a\n" ""
testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" ""
testing "" "tsort /dev/null" "" "" ""
testing "tsort empty" tsort "" "" ""
testing "tsort blank" tsort "" "" "\n"
testing "tsort blanks" tsort "" "" "\n\n \t\n "
# simple inputs having exactly one solution
testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n"
testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n"
# The following test helper accommodates future variable output because, as
# tsort is allowed to emit any total ordering that satisfies its input,
# should the implementation changes, these tests will remain valid.
#
# The idea is to verify that:
# - each input word is present EXACTLY ONCE in tsort's output
# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b'
# - the exit code is 0
tsort_test () {
fail=
name="$1"; shift
args="$*"
if [ $VERBOSE ]; then
echo "============"
echo "echo \"$args\" | tsort >actual"
fi
echo "$args" | tsort >actual
ec=$?
if [ $ec -ne 0 ]; then
fail "tsort exit $ec, expected 0"
fi
while [ $# -ne 0 ]; do
a=$1; shift
b=$1; shift
aline=$(grep -nxF "$a" <actual | cut -d: -f1)
bline=$(grep -nxF "$b" <actual | cut -d: -f1)
case $aline in
"") fail "word $a missing from output ($args)";;
*" "*) fail "word $a duplicated ($args)";;
esac
case $bline in
"") fail "word $b missing from output ($args)";;
*" "*) fail "word $b duplicated ($args)";;
esac
if [ $aline -gt $bline ]; then
fail "$a appears after $b ($args)"
fi
done
if [ $fail ] && [ $VERBOSE ]; then
echo "exit $ec, actual:"
cat actual
fi
rm actual
report "$name"
}
# Test that erroneous input causes an unsuccessful exit code
# we don't test the output error message
tsort_test_err () {
fail=
name="$1"; shift
echo "$*" | tsort >/dev/null 2>/dev/null
ec=$?
if [ $ec -eq 0 ]; then
fail "$name: unexpected exit 0 ($*)"
fi
report "$name"
}
fail () {
[ $VERBOSE ] && echo "ERROR: $*"
fail=1
}
report () {
if [ $fail ]; then
FAILCOUNT=$(($FAILCOUNT + 1))
echo "FAIL: $*"
else
echo "PASS: $*"
fi
}
tsort_test "tsort empty2"
tsort_test "tsort singleton" a a
tsort_test "tsort simple" a b b c
tsort_test "tsort 2singleton" a a b b
tsort_test "tsort medium" a b a b b c
tsort_test "tsort std.example" a b c c d e g g f g e f h h
tsort_test "tsort prefixes" a aa aa aaa aaaa aaaaa a aaaaa
tsort_test_err "tsort odd" a
tsort_test_err "tsort odd2" a b c
tsort_test_err "tsort cycle" a b b a
exit $FAILCOUNT