reformime: do not require \r\n

function                                             old     new   delta
parse                                                958    1063    +105
packed_usage                                       28691   28712     +21

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2011-05-22 03:46:33 +02:00
parent 9b90d9b503
commit d616ab6bbb
6 changed files with 331 additions and 298 deletions

View File

@ -101,7 +101,7 @@
#include "libbb.h"
#if 0
//#define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
#else
# define dbg_error_msg(...) ((void)0)
#endif

View File

@ -7,7 +7,3 @@
lib-y:=
INSERT
lib-$(CONFIG_MAKEMIME) += mime.o mail.o
lib-$(CONFIG_POPMAILDIR) += popmaildir.o mail.o
lib-$(CONFIG_REFORMIME) += mime.o mail.o
lib-$(CONFIG_SENDMAIL) += sendmail.o mail.o

View File

@ -8,35 +8,17 @@
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
//usage:#define makemime_trivial_usage
//usage: "[OPTIONS] [FILE]..."
//usage:#define makemime_full_usage "\n\n"
//usage: "Create multipart MIME-encoded message from FILEs\n"
/* //usage: "Transfer encoding is base64, disposition is inline (not attachment)\n" */
//usage: "\nOptions:"
//usage: "\n -o FILE Output. Default: stdout"
//usage: "\n -a HDR Add header. Examples:"
//usage: "\n \"From: user@host.org\", \"Date: `date -R`\""
//usage: "\n -c CT Content type. Default: text/plain"
//usage: "\n -C CS Charset. Default: " CONFIG_FEATURE_MIME_CHARSET
/* //usage: "\n -e ENC Transfer encoding. Ignored. base64 is assumed" */
//usage: "\n"
//usage: "\nOther options are silently ignored"
//usage:#define reformime_trivial_usage
//usage: "[OPTIONS] [FILE]..."
//usage:#define reformime_full_usage "\n\n"
//usage: "Parse MIME-encoded message\n"
//usage: "\nOptions:"
//usage: "\n -x PREFIX Extract content of MIME sections to files"
//usage: "\n -X PROG ARGS Filter content of MIME sections through PROG"
//usage: "\n Must be the last option"
//usage: "\n"
//usage: "\nOther options are silently ignored"
//kbuild:lib-$(CONFIG_MAKEMIME) += makemime.o mail.o
#include "libbb.h"
#include "mail.h"
#if 0
# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
#else
# define dbg_error_msg(...) ((void)0)
#endif
/*
makemime -c type [-o file] [-e encoding] [-C charset] [-N name] \
[-a "Header: Contents"] file
@ -50,7 +32,6 @@
\( opts \) - read from child process, that generates [ opts ]
Options:
-c type - create a new MIME section from "file" with this
Content-Type: (default is application/octet-stream).
-C charset - MIME charset of a new text/plain section.
@ -70,6 +51,28 @@ Options:
value on each line.
{which version of makemime is this? What do we support?}
*/
/* man makemime:
* -c TYPE: create a (non-multipart) MIME section with Content-Type: TYPE
* makemime -c TYPE [-e ENCODING] [-o OUTFILE] [-C CHARSET] [-N NAME] [-a HEADER...] FILE
* The -C option sets the MIME charset attribute for text/plain content.
* The -N option sets the name attribute for Content-Type:
* Encoding must be one of the following: 7bit, 8bit, quoted-printable, or base64.
* -m multipart/TYPE: create a multipart MIME collection with Content-Type: multipart/TYPE
* makemime -m multipart/TYPE [-e ENCODING] [-o OUTFILE] [-a HEADER...] FILE
* Type must be either "multipart/mixed", "multipart/alternative", or some other MIME multipart content type.
* Additionally, encoding can only be "7bit" or "8bit", and will default to "8bit" if not specified.
* Finally, filename must be a MIME-formatted section, NOT a regular file.
* The -m option creates an initial multipart MIME collection, that contains only one MIME section, taken from filename.
* The collection is written to standard output, or the pipe or to outputfile.
* -j FILE1: add a section to a multipart MIME collection
* makemime -j FILE1 [-o OUTFILE] FILE2
* FILE1 must be a MIME collection that was previously created by the -m option.
* FILE2 must be a MIME section that was previously created by the -c option.
* The -j options adds the MIME section in FILE2 to the MIME collection in FILE1.
*/
/* In busybox 1.15.0.svn, makemime generates output like this
@ -92,10 +95,8 @@ Content-Transfer-Encoding: base64
...file B contents...
--24269534-2145583448-1655890676--
*/
/* For reference: here is an example email to LKML which has
*
* For reference: here is an example email to LKML which has
* 1st unnamed part (so it serves as an email body)
* and one attached file:
...other headers...
@ -126,28 +127,21 @@ Content-Transfer-Encoding: 7bit
...random junk added by mailing list robots and such...
*/
/* man makemime:
//usage:#define makemime_trivial_usage
//usage: "[OPTIONS] [FILE]..."
//usage:#define makemime_full_usage "\n\n"
//usage: "Create multipart MIME-encoded message from FILEs\n"
/* //usage: "Transfer encoding is base64, disposition is inline (not attachment)\n" */
//usage: "\nOptions:"
//usage: "\n -o FILE Output. Default: stdout"
//usage: "\n -a HDR Add header. Examples:"
//usage: "\n \"From: user@host.org\", \"Date: `date -R`\""
//usage: "\n -c CT Content type. Default: text/plain"
//usage: "\n -C CS Charset. Default: " CONFIG_FEATURE_MIME_CHARSET
/* //usage: "\n -e ENC Transfer encoding. Ignored. base64 is assumed" */
//usage: "\n"
//usage: "\nOther options are silently ignored"
* -c TYPE: create a (non-multipart) MIME section with Content-Type: TYPE
* makemime -c TYPE [-e ENCODING] [-o OUTFILE] [-C CHARSET] [-N NAME] [-a HEADER...] FILE
* The -C option sets the MIME charset attribute for text/plain content.
* The -N option sets the name attribute for Content-Type:
* Encoding must be one of the following: 7bit, 8bit, quoted-printable, or base64.
* -m multipart/TYPE: create a multipart MIME collection with Content-Type: multipart/TYPE
* makemime -m multipart/TYPE [-e ENCODING] [-o OUTFILE] [-a HEADER...] FILE
* Type must be either "multipart/mixed", "multipart/alternative", or some other MIME multipart content type.
* Additionally, encoding can only be "7bit" or "8bit", and will default to "8bit" if not specified.
* Finally, filename must be a MIME-formatted section, NOT a regular file.
* The -m option creates an initial multipart MIME collection, that contains only one MIME section, taken from filename.
* The collection is written to standard output, or the pipe or to outputfile.
* -j FILE1: add a section to a multipart MIME collection
* makemime -j FILE1 [-o OUTFILE] FILE2
* FILE1 must be a MIME collection that was previously created by the -m option.
* FILE2 must be a MIME section that was previously created by the -c option.
* The -j options adds the MIME section in FILE2 to the MIME collection in FILE1.
*/
int makemime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int makemime_main(int argc UNUSED_PARAM, char **argv)
{
@ -222,244 +216,3 @@ int makemime_main(int argc UNUSED_PARAM, char **argv)
return EXIT_SUCCESS;
#undef boundary
}
static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
{
const char *r = NULL;
int i;
for (i = 0; string_array[i] != NULL; i++) {
if (strcasecmp(string_array[i], key) == 0) {
r = (char *)string_array[i+1];
break;
}
}
return (r) ? r : defvalue;
}
static const char *xfind_token(const char *const string_array[], const char *key)
{
const char *r = find_token(string_array, key, NULL);
if (r)
return r;
bb_error_msg_and_die("header: %s", key);
}
enum {
OPT_x = 1 << 0,
OPT_X = 1 << 1,
#if ENABLE_FEATURE_REFORMIME_COMPAT
OPT_d = 1 << 2,
OPT_e = 1 << 3,
OPT_i = 1 << 4,
OPT_s = 1 << 5,
OPT_r = 1 << 6,
OPT_c = 1 << 7,
OPT_m = 1 << 8,
OPT_h = 1 << 9,
OPT_o = 1 << 10,
OPT_O = 1 << 11,
#endif
};
static int parse(const char *boundary, char **argv)
{
char *line, *s, *p;
const char *type;
int boundary_len = strlen(boundary);
const char *delims = " ;\"\t\r\n";
const char *uniq;
int ntokens;
const char *tokens[32]; // 32 is enough
// prepare unique string pattern
uniq = xasprintf("%%llu.%u.%s", (unsigned)getpid(), safe_gethostname());
//bb_info_msg("PARSE[%s]", uniq);
while ((line = xmalloc_fgets_str(stdin, "\r\n\r\n")) != NULL) {
// seek to start of MIME section
// N.B. to avoid false positives let us seek to the _last_ occurance
p = NULL;
s = line;
while ((s = strcasestr(s, "Content-Type:")) != NULL)
p = s++;
if (!p)
goto next;
//bb_info_msg("L[%s]", p);
// split to tokens
// TODO: strip of comments which are of form: (comment-text)
ntokens = 0;
tokens[ntokens] = NULL;
for (s = strtok(p, delims); s; s = strtok(NULL, delims)) {
tokens[ntokens] = s;
if (ntokens < ARRAY_SIZE(tokens) - 1)
ntokens++;
//bb_info_msg("L[%d][%s]", ntokens, s);
}
tokens[ntokens] = NULL;
//bb_info_msg("N[%d]", ntokens);
// analyse tokens
type = find_token(tokens, "Content-Type:", "text/plain");
//bb_info_msg("T[%s]", type);
if (0 == strncasecmp(type, "multipart/", 10)) {
if (0 == strcasecmp(type+10, "mixed")) {
parse(xfind_token(tokens, "boundary="), argv);
} else
bb_error_msg_and_die("no support of content type '%s'", type);
} else {
pid_t pid = pid;
int rc;
FILE *fp;
// fetch charset
const char *charset = find_token(tokens, "charset=", CONFIG_FEATURE_MIME_CHARSET);
// fetch encoding
const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
// compose target filename
char *filename = (char *)find_token(tokens, "filename=", NULL);
if (!filename)
filename = xasprintf(uniq, monotonic_us());
else
filename = bb_get_last_path_component_strip(xstrdup(filename));
// start external helper, if any
if (opts & OPT_X) {
int fd[2];
xpipe(fd);
pid = vfork();
if (0 == pid) {
// child reads from fd[0]
close(fd[1]);
xmove_fd(fd[0], STDIN_FILENO);
xsetenv("CONTENT_TYPE", type);
xsetenv("CHARSET", charset);
xsetenv("ENCODING", encoding);
xsetenv("FILENAME", filename);
BB_EXECVP_or_die(argv);
}
// parent dumps to fd[1]
close(fd[0]);
fp = xfdopen_for_write(fd[1]);
signal(SIGPIPE, SIG_IGN); // ignore EPIPE
// or create a file for dump
} else {
char *fname = xasprintf("%s%s", *argv, filename);
fp = xfopen_for_write(fname);
free(fname);
}
// housekeeping
free(filename);
// dump to fp
if (0 == strcasecmp(encoding, "base64")) {
read_base64(stdin, fp, '-');
} else if (0 != strcasecmp(encoding, "7bit")
&& 0 != strcasecmp(encoding, "8bit")
) {
// quoted-printable, binary, user-defined are unsupported so far
bb_error_msg_and_die("no support of encoding '%s'", encoding);
} else {
// N.B. we have written redundant \n. so truncate the file
// The following weird 2-tacts reading technique is due to
// we have to not write extra \n at the end of the file
// In case of -x option we could truncate the resulting file as
// fseek(fp, -1, SEEK_END);
// if (ftruncate(fileno(fp), ftell(fp)))
// bb_perror_msg("ftruncate");
// But in case of -X we have to be much more careful. There is
// no means to truncate what we already have sent to the helper.
p = xmalloc_fgets_str(stdin, "\r\n");
while (p) {
s = xmalloc_fgets_str(stdin, "\r\n");
if (s == NULL)
break;
if ('-' == s[0]
&& '-' == s[1]
&& 0 == strncmp(s+2, boundary, boundary_len)
) {
break;
}
fputs(p, fp);
p = s;
}
/*
while ((s = xmalloc_fgetline_str(stdin, "\r\n")) != NULL) {
if ('-' == s[0] && '-' == s[1]
&& 0 == strncmp(s+2, boundary, boundary_len))
break;
fprintf(fp, "%s\n", s);
}
// N.B. we have written redundant \n. so truncate the file
fseek(fp, -1, SEEK_END);
if (ftruncate(fileno(fp), ftell(fp)))
bb_perror_msg("ftruncate");
*/
}
fclose(fp);
// finalize helper
if (opts & OPT_X) {
signal(SIGPIPE, SIG_DFL);
// exit if helper exited >0
rc = (wait4pid(pid) & 0xff);
if (rc)
return rc+20;
}
// check multipart finalized
if (s && '-' == s[2+boundary_len] && '-' == s[2+boundary_len+1]) {
free(line);
break;
}
}
next:
free(line);
}
//bb_info_msg("ENDPARSE[%s]", boundary);
return EXIT_SUCCESS;
}
/*
Usage: reformime [options]
-d - parse a delivery status notification.
-e - extract contents of MIME section.
-x - extract MIME section to a file.
-X - pipe MIME section to a program.
-i - show MIME info.
-s n.n.n.n - specify MIME section.
-r - rewrite message, filling in missing MIME headers.
-r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
-r8 - also convert quoted-printable encoding to 8bit, if possible.
-c charset - default charset for rewriting, -o, and -O.
-m [file] [file]... - create a MIME message digest.
-h "header" - decode RFC 2047-encoded header.
-o "header" - encode unstructured header using RFC 2047.
-O "header" - encode address list header using RFC 2047.
*/
int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int reformime_main(int argc UNUSED_PARAM, char **argv)
{
const char *opt_prefix = "";
INIT_G();
// parse options
// N.B. only -x and -X are supported so far
opt_complementary = "x--X:X--x" IF_FEATURE_REFORMIME_COMPAT(":m::");
opts = getopt32(argv,
"x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:h:o:O:"),
&opt_prefix
IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
);
//argc -= optind;
argv += optind;
return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
}

View File

@ -10,6 +10,8 @@
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
//kbuild:lib-$(CONFIG_POPMAILDIR) += popmaildir.o mail.o
//usage:#define popmaildir_trivial_usage
//usage: "[OPTIONS] MAILDIR [CONN_HELPER ARGS]"
//usage:#define popmaildir_full_usage "\n\n"

280
mailutils/reformime.c Normal file
View File

@ -0,0 +1,280 @@
/* vi: set sw=4 ts=4: */
/*
* makemime: create MIME-encoded message
* reformime: parse MIME-encoded message
*
* Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com>
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
//kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o
#include "libbb.h"
#include "mail.h"
#if 0
# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
#else
# define dbg_error_msg(...) ((void)0)
#endif
static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
{
const char *r = NULL;
int i;
for (i = 0; string_array[i] != NULL; i++) {
if (strcasecmp(string_array[i], key) == 0) {
r = (char *)string_array[i+1];
break;
}
}
return (r) ? r : defvalue;
}
static const char *xfind_token(const char *const string_array[], const char *key)
{
const char *r = find_token(string_array, key, NULL);
if (r)
return r;
bb_error_msg_and_die("not found: '%s'", key);
}
enum {
OPT_x = 1 << 0,
OPT_X = 1 << 1,
#if ENABLE_FEATURE_REFORMIME_COMPAT
OPT_d = 1 << 2,
OPT_e = 1 << 3,
OPT_i = 1 << 4,
OPT_s = 1 << 5,
OPT_r = 1 << 6,
OPT_c = 1 << 7,
OPT_m = 1 << 8,
OPT_h = 1 << 9,
OPT_o = 1 << 10,
OPT_O = 1 << 11,
#endif
};
static int parse(const char *boundary, char **argv)
{
int boundary_len = strlen(boundary);
char uniq[sizeof("%%llu.%u") + sizeof(int)*3];
dbg_error_msg("BOUNDARY[%s]", boundary);
// prepare unique string pattern
sprintf(uniq, "%%llu.%u", (unsigned)getpid());
dbg_error_msg("UNIQ[%s]", uniq);
while (1) {
char *header;
const char *tokens[32]; /* 32 is enough */
const char *type;
/* Read the header (everything up to two \n) */
{
unsigned header_idx = 0;
int last_ch = 0;
header = NULL;
while (1) {
int ch = fgetc(stdin);
if (ch == '\r') /* Support both line endings */
continue;
if (ch == EOF)
break;
if (ch == '\n' && last_ch == ch)
break;
if (!(header_idx & 0xff))
header = xrealloc(header, header_idx + 0x101);
header[header_idx++] = last_ch = ch;
}
if (!header) {
dbg_error_msg("EOF");
break;
}
header[header_idx] = '\0';
dbg_error_msg("H:'%s'", p);
}
/* Split to tokens */
{
char *s, *p;
unsigned ntokens;
const char *delims = ";=\" \t\n";
/* Skip to last Content-Type: */
s = p = header;
while ((p = strchr(p, '\n')) != NULL) {
p++;
if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0)
s = p;
}
dbg_error_msg("L:'%s'", p);
ntokens = 0;
s = strtok(s, delims);
while (s) {
tokens[ntokens] = s;
if (ntokens < ARRAY_SIZE(tokens) - 1)
ntokens++;
dbg_error_msg("L[%d]='%s'", ntokens, s);
s = strtok(NULL, delims);
}
tokens[ntokens] = NULL;
dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens);
if (ntokens == 0)
break;
}
/* Is it multipart? */
type = find_token(tokens, "Content-Type:", "text/plain");
dbg_error_msg("TYPE:'%s'", type);
if (0 == strncasecmp(type, "multipart/", 10)) {
/* Yes, recurse */
if (strcasecmp(type + 10, "mixed") != 0)
bb_error_msg_and_die("no support of content type '%s'", type);
parse(xfind_token(tokens, "boundary"), argv);
} else {
/* No, process one non-multipart section */
char *end;
pid_t pid = pid;
FILE *fp;
const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET);
const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
/* Compose target filename */
char *filename = (char *)find_token(tokens, "filename", NULL);
if (!filename)
filename = xasprintf(uniq, monotonic_us());
else
filename = bb_get_last_path_component_strip(xstrdup(filename));
if (opts & OPT_X) {
int fd[2];
/* start external helper */
xpipe(fd);
pid = vfork();
if (0 == pid) {
/* child reads from fd[0] */
close(fd[1]);
xmove_fd(fd[0], STDIN_FILENO);
xsetenv("CONTENT_TYPE", type);
xsetenv("CHARSET", charset);
xsetenv("ENCODING", encoding);
xsetenv("FILENAME", filename);
BB_EXECVP_or_die(argv);
}
/* parent will write to fd[1] */
close(fd[0]);
fp = xfdopen_for_write(fd[1]);
signal(SIGPIPE, SIG_IGN);
} else {
/* write to file */
char *fname = xasprintf("%s%s", *argv, filename);
fp = xfopen_for_write(fname);
free(fname);
}
free(filename);
/* write to fp */
end = NULL;
if (0 == strcasecmp(encoding, "base64")) {
read_base64(stdin, fp, '-');
} else
if (0 != strcasecmp(encoding, "7bit")
&& 0 != strcasecmp(encoding, "8bit")
) {
/* quoted-printable, binary, user-defined are unsupported so far */
bb_error_msg_and_die("encoding '%s' not supported", encoding);
} else {
/* plain 7bit or 8bit */
while ((end = xmalloc_fgets(stdin)) != NULL) {
if ('-' == end[0]
&& '-' == end[1]
&& strncmp(end + 2, boundary, boundary_len) == 0
) {
break;
}
fputs(end, fp);
}
}
fclose(fp);
/* Wait for child */
if (opts & OPT_X) {
int rc;
signal(SIGPIPE, SIG_DFL);
rc = (wait4pid(pid) & 0xff);
if (rc != 0)
return rc + 20;
}
/* Multipart ended? */
if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) {
dbg_error_msg("FINISHED MPART:'%s'", end);
break;
}
dbg_error_msg("FINISHED:'%s'", end);
free(end);
} /* end of "handle one non-multipart block" */
free(header);
} /* while (1) */
dbg_error_msg("ENDPARSE[%s]", boundary);
return EXIT_SUCCESS;
}
//usage:#define reformime_trivial_usage
//usage: "[OPTIONS]"
//usage:#define reformime_full_usage "\n\n"
//usage: "Parse MIME-encoded message on stdin\n"
//usage: "\nOptions:"
//usage: "\n -x PREFIX Extract content of MIME sections to files"
//usage: "\n -X PROG ARGS Filter content of MIME sections through PROG"
//usage: "\n Must be the last option"
//usage: "\n"
//usage: "\nOther options are silently ignored"
/*
Usage: reformime [options]
-d - parse a delivery status notification.
-e - extract contents of MIME section.
-x - extract MIME section to a file.
-X - pipe MIME section to a program.
-i - show MIME info.
-s n.n.n.n - specify MIME section.
-r - rewrite message, filling in missing MIME headers.
-r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
-r8 - also convert quoted-printable encoding to 8bit, if possible.
-c charset - default charset for rewriting, -o, and -O.
-m [file] [file]... - create a MIME message digest.
-h "header" - decode RFC 2047-encoded header.
-o "header" - encode unstructured header using RFC 2047.
-O "header" - encode address list header using RFC 2047.
*/
int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int reformime_main(int argc UNUSED_PARAM, char **argv)
{
const char *opt_prefix = "";
INIT_G();
// parse options
// N.B. only -x and -X are supported so far
opt_complementary = "x--X:X--x" IF_FEATURE_REFORMIME_COMPAT(":m::");
opts = getopt32(argv,
"x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:h:o:O:"),
&opt_prefix
IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
);
argv += optind;
return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
}

View File

@ -7,6 +7,8 @@
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
//kbuild:lib-$(CONFIG_SENDMAIL) += sendmail.o mail.o
//usage:#define sendmail_trivial_usage
//usage: "[OPTIONS] [RECIPIENT_EMAIL]..."
//usage:#define sendmail_full_usage "\n\n"