2014-03-05 18:56:20 +01:00
|
|
|
/* vi: set sw=4 ts=4: */
|
|
|
|
/*
|
|
|
|
* shuf: Write a random permutation of the input lines to standard output.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com>
|
|
|
|
*
|
|
|
|
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
|
|
|
|
*/
|
|
|
|
//config:config SHUF
|
2017-07-18 22:01:24 +02:00
|
|
|
//config: bool "shuf (5.4 kb)"
|
2014-03-05 18:56:20 +01:00
|
|
|
//config: default y
|
|
|
|
//config: help
|
2017-07-21 09:50:55 +02:00
|
|
|
//config: Generate random permutations
|
2014-03-05 18:56:20 +01:00
|
|
|
|
|
|
|
//applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf))
|
|
|
|
|
2017-09-18 16:28:43 +02:00
|
|
|
//kbuild:lib-$(CONFIG_SHUF) += shuf.o
|
|
|
|
|
2014-03-05 18:56:20 +01:00
|
|
|
//usage:#define shuf_trivial_usage
|
2021-08-23 15:48:22 +02:00
|
|
|
//usage: "[-n NUM] [-o FILE] [-z] [FILE | -e [ARG...] | -i L-H]"
|
2014-03-05 18:56:20 +01:00
|
|
|
//usage:#define shuf_full_usage "\n\n"
|
2014-03-07 14:41:53 +01:00
|
|
|
//usage: "Randomly permute lines\n"
|
2014-03-05 18:56:20 +01:00
|
|
|
//usage: "\n -n NUM Output at most NUM lines"
|
2014-03-07 14:41:53 +01:00
|
|
|
//usage: "\n -o FILE Write to FILE, not standard output"
|
2021-08-07 09:41:49 +01:00
|
|
|
//usage: "\n -z NUL terminated output"
|
2021-08-23 15:48:22 +02:00
|
|
|
//usage: "\n -e Treat ARGs as lines"
|
|
|
|
//usage: "\n -i L-H Treat numbers L-H as lines"
|
2014-03-05 18:56:20 +01:00
|
|
|
|
|
|
|
#include "libbb.h"
|
|
|
|
|
|
|
|
/* This is a NOEXEC applet. Be very careful! */
|
|
|
|
|
|
|
|
#define OPT_e (1 << 0)
|
|
|
|
#define OPT_i (1 << 1)
|
|
|
|
#define OPT_n (1 << 2)
|
|
|
|
#define OPT_o (1 << 3)
|
|
|
|
#define OPT_z (1 << 4)
|
|
|
|
#define OPT_STR "ei:n:o:z"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the Fisher-Yates shuffle algorithm on an array of lines.
|
2021-08-07 09:41:49 +01:00
|
|
|
* If the required number of output lines is less than the total
|
|
|
|
* we can stop shuffling early.
|
2014-03-05 18:56:20 +01:00
|
|
|
*/
|
2021-08-07 09:41:49 +01:00
|
|
|
static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
|
2014-03-05 18:56:20 +01:00
|
|
|
{
|
|
|
|
srand(monotonic_us());
|
|
|
|
|
2021-09-07 22:51:42 +02:00
|
|
|
while (outlines != 0) {
|
|
|
|
char *tmp;
|
|
|
|
unsigned r = rand();
|
2014-03-05 18:56:20 +01:00
|
|
|
/* RAND_MAX can be as small as 32767 */
|
2021-09-07 22:51:42 +02:00
|
|
|
if (numlines > RAND_MAX)
|
2014-03-05 18:56:20 +01:00
|
|
|
r ^= rand() << 15;
|
2021-09-07 22:51:42 +02:00
|
|
|
r %= numlines;
|
|
|
|
//TODO: the above method is seriously non-uniform when numlines is very large.
|
|
|
|
//For example, with numlines of 0xf0000000,
|
|
|
|
//values of (r % numlines) in [0, 0x0fffffff] range
|
|
|
|
//are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
|
|
|
|
//whereas only one value, r=0xefffffff, maps to 0xefffffff.
|
|
|
|
numlines--;
|
|
|
|
tmp = lines[numlines];
|
|
|
|
lines[numlines] = lines[r];
|
2014-03-05 18:56:20 +01:00
|
|
|
lines[r] = tmp;
|
2021-09-07 22:51:42 +02:00
|
|
|
outlines--;
|
2014-03-05 18:56:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
|
|
|
int shuf_main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
unsigned opts;
|
|
|
|
char *opt_i_str, *opt_n_str, *opt_o_str;
|
|
|
|
char **lines;
|
2021-09-04 17:00:22 +02:00
|
|
|
unsigned long long lo = lo;
|
2021-08-07 09:41:49 +01:00
|
|
|
unsigned numlines, outlines;
|
2021-09-04 17:00:22 +02:00
|
|
|
unsigned i;
|
2014-03-05 18:56:20 +01:00
|
|
|
char eol;
|
|
|
|
|
2017-08-08 21:55:02 +02:00
|
|
|
opts = getopt32(argv, "^"
|
|
|
|
OPT_STR
|
|
|
|
"\0" "e--i:i--e"/* mutually exclusive */,
|
|
|
|
&opt_i_str, &opt_n_str, &opt_o_str
|
|
|
|
);
|
2014-03-05 18:56:20 +01:00
|
|
|
|
|
|
|
argc -= optind;
|
|
|
|
argv += optind;
|
|
|
|
|
|
|
|
/* Prepare lines for shuffling - either: */
|
|
|
|
if (opts & OPT_e) {
|
|
|
|
/* make lines from command-line arguments */
|
|
|
|
numlines = argc;
|
|
|
|
lines = argv;
|
|
|
|
} else
|
|
|
|
if (opts & OPT_i) {
|
|
|
|
/* create a range of numbers */
|
2021-09-04 17:00:22 +02:00
|
|
|
unsigned long long hi;
|
2014-03-05 18:56:20 +01:00
|
|
|
char *dash;
|
|
|
|
|
2021-08-23 15:52:34 +02:00
|
|
|
if (argv[0])
|
|
|
|
bb_show_usage();
|
|
|
|
|
2014-03-05 18:56:20 +01:00
|
|
|
dash = strchr(opt_i_str, '-');
|
|
|
|
if (!dash) {
|
|
|
|
bb_error_msg_and_die("bad range '%s'", opt_i_str);
|
|
|
|
}
|
|
|
|
*dash = '\0';
|
2021-09-04 17:00:22 +02:00
|
|
|
lo = xatoull(opt_i_str);
|
|
|
|
hi = xatoull(dash + 1);
|
2015-06-01 10:40:09 +00:00
|
|
|
*dash = '-';
|
2021-09-04 17:00:22 +02:00
|
|
|
if (hi < lo)
|
2014-03-05 18:56:20 +01:00
|
|
|
bb_error_msg_and_die("bad range '%s'", opt_i_str);
|
2021-09-04 17:00:22 +02:00
|
|
|
hi -= lo;
|
|
|
|
if (sizeof(size_t) > sizeof(numlines)) {
|
|
|
|
if (hi >= UINT_MAX)
|
|
|
|
bb_error_msg_and_die("bad range '%s'", opt_i_str);
|
|
|
|
} else {
|
|
|
|
if (hi >= UINT_MAX / sizeof(lines[0]))
|
|
|
|
bb_error_msg_and_die("bad range '%s'", opt_i_str);
|
2014-03-05 18:56:20 +01:00
|
|
|
}
|
|
|
|
|
2021-09-04 17:00:22 +02:00
|
|
|
numlines = hi + 1;
|
|
|
|
lines = xmalloc((size_t)numlines * sizeof(lines[0]));
|
2014-03-05 18:56:20 +01:00
|
|
|
for (i = 0; i < numlines; i++) {
|
2021-09-04 17:00:22 +02:00
|
|
|
lines[i] = (char*)(uintptr_t)i;
|
2014-03-05 18:56:20 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* default - read lines from stdin or the input file */
|
|
|
|
FILE *fp;
|
2021-08-29 14:39:01 +02:00
|
|
|
const char *fname = "-";
|
2014-03-05 18:56:20 +01:00
|
|
|
|
2021-08-29 14:39:01 +02:00
|
|
|
if (argv[0]) {
|
|
|
|
if (argv[1])
|
|
|
|
bb_show_usage();
|
|
|
|
fname = argv[0];
|
|
|
|
}
|
2014-03-05 18:56:20 +01:00
|
|
|
|
2021-08-29 14:39:01 +02:00
|
|
|
fp = xfopen_stdin(fname);
|
2014-03-05 18:56:20 +01:00
|
|
|
lines = NULL;
|
|
|
|
numlines = 0;
|
|
|
|
for (;;) {
|
|
|
|
char *line = xmalloc_fgetline(fp);
|
|
|
|
if (!line)
|
|
|
|
break;
|
|
|
|
lines = xrealloc_vector(lines, 6, numlines);
|
|
|
|
lines[numlines++] = line;
|
|
|
|
}
|
|
|
|
fclose_if_not_stdin(fp);
|
|
|
|
}
|
|
|
|
|
2021-08-07 09:41:49 +01:00
|
|
|
outlines = numlines;
|
|
|
|
if (opts & OPT_n) {
|
|
|
|
outlines = xatou(opt_n_str);
|
|
|
|
if (outlines > numlines)
|
|
|
|
outlines = numlines;
|
|
|
|
}
|
|
|
|
|
|
|
|
shuffle_lines(lines, numlines, outlines);
|
2014-03-05 18:56:20 +01:00
|
|
|
|
|
|
|
if (opts & OPT_o)
|
|
|
|
xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
|
|
|
|
|
|
|
|
eol = '\n';
|
|
|
|
if (opts & OPT_z)
|
|
|
|
eol = '\0';
|
|
|
|
|
2021-08-07 09:41:49 +01:00
|
|
|
for (i = numlines - outlines; i < numlines; i++) {
|
2021-09-04 17:00:22 +02:00
|
|
|
if (opts & OPT_i)
|
|
|
|
printf("%llu%c", lo + (uintptr_t)lines[i], eol);
|
|
|
|
else
|
2014-03-07 14:32:39 +01:00
|
|
|
printf("%s%c", lines[i], eol);
|
2014-03-05 18:56:20 +01:00
|
|
|
}
|
|
|
|
|
2022-01-04 23:31:58 +01:00
|
|
|
fflush_stdout_and_exit_SUCCESS();
|
2014-03-05 18:56:20 +01:00
|
|
|
}
|