uniq: support -w. closes bug 3094.

function                                             old     new   delta
packed_usage                                       24136   24132      -4
uniq_main                                            399     384     -15
This commit is contained in:
Denis Vlasenko 2008-05-03 07:21:27 +00:00
parent 687a26fe0d
commit 96b99b860c
3 changed files with 33 additions and 17 deletions

View File

@ -12,8 +12,6 @@
#include "libbb.h" #include "libbb.h"
static const char uniq_opts[] ALIGN1 = "cdu" "f:s:" "cdu\0\1\2\4";
static FILE *xgetoptfile_uniq_s(char **argv, int read0write2) static FILE *xgetoptfile_uniq_s(char **argv, int read0write2)
{ {
const char *n; const char *n;
@ -31,9 +29,11 @@ int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv) int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
{ {
FILE *in, *out; FILE *in, *out;
unsigned long dups, skip_fields, skip_chars, i;
const char *s0, *e0, *s1, *e1, *input_filename; const char *s0, *e0, *s1, *e1, *input_filename;
unsigned long dups;
unsigned skip_fields, skip_chars, max_chars;
unsigned opt; unsigned opt;
unsigned i;
enum { enum {
OPT_c = 0x1, OPT_c = 0x1,
@ -41,15 +41,14 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
OPT_u = 0x4, OPT_u = 0x4,
OPT_f = 0x8, OPT_f = 0x8,
OPT_s = 0x10, OPT_s = 0x10,
OPT_w = 0x20,
}; };
skip_fields = skip_chars = 0; skip_fields = skip_chars = 0;
max_chars = -1;
opt = getopt32(argv, "cduf:s:", &s0, &s1); opt_complementary = "f+:s+:w+";
if (opt & OPT_f) opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars);
skip_fields = xatoul(s0);
if (opt & OPT_s)
skip_chars = xatoul(s1);
argv += optind; argv += optind;
input_filename = *argv; input_filename = *argv;
@ -81,16 +80,16 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
++e1; ++e1;
} }
if (!s0 || strcmp(e0, e1)) { if (!s0 || strncmp(e0, e1, max_chars)) {
break; break;
} }
++dups; /* Note: Testing for overflow seems excessive. */ ++dups; /* note: testing for overflow seems excessive. */
} }
if (s0) { if (s0) {
if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */ if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */
fprintf(out, "\0%d " + (opt & 1), dups + 1); fprintf(out, "\0%ld " + (opt & 1), dups + 1); /* 1 == OPT_c */
fprintf(out, "%s\n", s0); fprintf(out, "%s\n", s0);
} }
free((void *)s0); free((void *)s0);

View File

@ -4303,16 +4303,16 @@
) )
#define uniq_trivial_usage \ #define uniq_trivial_usage \
"[-fscdu]... [INPUT [OUTPUT]]" "[-fscduw]... [INPUT [OUTPUT]]"
#define uniq_full_usage "\n\n" \ #define uniq_full_usage "\n\n" \
"Discard all but one of successive identical lines from INPUT\n" \ "Discard duplicate lines\n" \
"(or standard input), writing to OUTPUT (or standard output)\n" \
"\nOptions:" \ "\nOptions:" \
"\n -c Prefix lines by the number of occurrences" \ "\n -c Prefix lines by the number of occurrences" \
"\n -d Only print duplicate lines" \ "\n -d Only print duplicate lines" \
"\n -u Only print unique lines" \ "\n -u Only print unique lines" \
"\n -f N Skip the first N fields" \ "\n -f N Skip first N fields" \
"\n -s N Skip the first N chars (after any skipped fields)" \ "\n -s N Skip first N chars (after any skipped fields)" \
"\n -w N Compare N characters in line" \
#define uniq_example_usage \ #define uniq_example_usage \
"$ echo -e \"a\\na\\nb\\nc\\nc\\na\" | sort | uniq\n" \ "$ echo -e \"a\\na\\nb\\nc\\nc\\na\" | sort | uniq\n" \

View File

@ -41,6 +41,7 @@ testing "uniq input - (specify stdout)" "uniq input -" \
#-c occurrences #-c occurrences
#-d dups only #-d dups only
#-u #-u
#-w max chars
# Test various command line options # Test various command line options
@ -60,6 +61,22 @@ aa bb cc9
bb cc dd8 bb cc dd8
aa bb cc9 aa bb cc9
" "
testing "uniq -w (compare max characters)" "uniq -w 2" \
"cc1
" "" \
"cc1
cc2
cc3
"
testing "uniq -s -w (skip fields and compare max chars)" \
"uniq -s 2 -w 2" \
"aaccaa
" "" \
"aaccaa
aaccbb
bbccaa
"
# -d is "Suppress the writing fo lines that are not repeated in the input." # -d is "Suppress the writing fo lines that are not repeated in the input."
# -u is "Suppress the writing of lines that are repeated in the input." # -u is "Suppress the writing of lines that are repeated in the input."