wget: more thorough sanitization of other side's data

function                                             old     new   delta
get_sanitized_hdr                                      -     156    +156
fgets_trim_sanitize                                    -     128    +128
ftpcmd                                               129     133      +4
parse_url                                            461     454      -7
sanitize_string                                       14       -     -14
wget_main                                           2431    2381     -50
fgets_and_trim                                       119       -    -119
gethdr                                               163       -    -163
------------------------------------------------------------------------------
(add/remove: 2/3 grow/shrink: 1/2 up/down: 288/-353)          Total: -65 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2018-02-12 16:46:13 +01:00
parent ecaec1dbec
commit 3459024bf4

View File

@ -352,15 +352,6 @@ static char *base64enc(const char *str)
} }
#endif #endif
static char* sanitize_string(char *s)
{
unsigned char *p = (void *) s;
while (*p >= ' ')
p++;
*p = '\0';
return s;
}
#if ENABLE_FEATURE_WGET_TIMEOUT #if ENABLE_FEATURE_WGET_TIMEOUT
static void alarm_handler(int sig UNUSED_PARAM) static void alarm_handler(int sig UNUSED_PARAM)
{ {
@ -423,22 +414,49 @@ static FILE *open_socket(len_and_sockaddr *lsa)
return fp; return fp;
} }
/* We balk at any control chars in other side's messages.
* This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
* and error messages.
*
* The only exception is tabs, which are converted to (one) space:
* HTTP's "headers: <whitespace> values" may have those.
*/
static char* sanitize_string(char *s)
{
unsigned char *p = (void *) s;
while (*p) {
if (*p < ' ') {
if (*p != '\t')
break;
*p = ' ';
}
p++;
}
*p = '\0';
return s;
}
/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ /* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
static char fgets_and_trim(FILE *fp, const char *fmt) static char fgets_trim_sanitize(FILE *fp, const char *fmt)
{ {
char c; char c;
char *buf_ptr; char *buf_ptr;
set_alarm(); set_alarm();
if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
bb_perror_msg_and_die("error getting response"); bb_perror_msg_and_die("error getting response");
clear_alarm(); clear_alarm();
buf_ptr = strchrnul(G.wget_buf, '\n'); buf_ptr = strchrnul(G.wget_buf, '\n');
c = *buf_ptr; c = *buf_ptr;
#if 1
/* Disallow any control chars: trim at first char < 0x20 */
sanitize_string(G.wget_buf);
#else
*buf_ptr = '\0'; *buf_ptr = '\0';
buf_ptr = strchrnul(G.wget_buf, '\r'); buf_ptr = strchrnul(G.wget_buf, '\r');
*buf_ptr = '\0'; *buf_ptr = '\0';
#endif
log_io("< %s", G.wget_buf); log_io("< %s", G.wget_buf);
@ -462,8 +480,10 @@ static int ftpcmd(const char *s1, const char *s2, FILE *fp)
log_io("> %s%s", s1, s2); log_io("> %s%s", s1, s2);
} }
/* Read until "Nxx something" is received */
G.wget_buf[3] = 0;
do { do {
fgets_and_trim(fp, "%s\n"); fgets_trim_sanitize(fp, "%s\n");
} while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
G.wget_buf[3] = '\0'; G.wget_buf[3] = '\0';
@ -505,7 +525,7 @@ static void parse_url(const char *src_url, struct host_info *h)
h->protocol = P_HTTP; h->protocol = P_HTTP;
} else { } else {
*p = ':'; *p = ':';
bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); bb_error_msg_and_die("not an http or ftp url: %s", url);
} }
} else { } else {
// GNU wget is user-friendly and falls back to http:// // GNU wget is user-friendly and falls back to http://
@ -560,13 +580,13 @@ static void parse_url(const char *src_url, struct host_info *h)
*/ */
} }
static char *gethdr(FILE *fp) static char *get_sanitized_hdr(FILE *fp)
{ {
char *s, *hdrval; char *s, *hdrval;
int c; int c;
/* retrieve header line */ /* retrieve header line */
c = fgets_and_trim(fp, " %s\n"); c = fgets_trim_sanitize(fp, " %s\n");
/* end of the headers? */ /* end of the headers? */
if (G.wget_buf[0] == '\0') if (G.wget_buf[0] == '\0')
@ -588,7 +608,7 @@ static char *gethdr(FILE *fp)
/* verify we are at the end of the header name */ /* verify we are at the end of the header name */
if (*s != ':') if (*s != ':')
bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf)); bb_error_msg_and_die("bad header line: %s", G.wget_buf);
/* locate the start of the header value */ /* locate the start of the header value */
*s++ = '\0'; *s++ = '\0';
@ -755,7 +775,8 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
#endif #endif
if (ftpcmd(NULL, NULL, sfp) != 220) if (ftpcmd(NULL, NULL, sfp) != 220)
bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4)); bb_error_msg_and_die("%s", G.wget_buf);
/* note: ftpcmd() sanitizes G.wget_buf, ok to print */
/* /*
* Splitting username:password pair, * Splitting username:password pair,
@ -772,7 +793,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
break; break;
/* fall through (failed login) */ /* fall through (failed login) */
default: default:
bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4)); bb_error_msg_and_die("ftp login: %s", G.wget_buf);
} }
ftpcmd("TYPE I", NULL, sfp); ftpcmd("TYPE I", NULL, sfp);
@ -796,7 +817,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
} else } else
if (ftpcmd("PASV", NULL, sfp) != 227) { if (ftpcmd("PASV", NULL, sfp) != 227) {
pasv_error: pasv_error:
bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf)); bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
} }
port = parse_pasv_epsv(G.wget_buf); port = parse_pasv_epsv(G.wget_buf);
if (port < 0) if (port < 0)
@ -824,8 +845,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
reset_beg_range_to_zero(); reset_beg_range_to_zero();
} }
//TODO: needs ftp-escaping 0xff and '\n' bytes here.
//Or disallow '\n' altogether via sanitize_string() in parse_url().
//But 0xff's are possible in valid utf8 filenames.
if (ftpcmd("RETR ", target->path, sfp) > 150) if (ftpcmd("RETR ", target->path, sfp) > 150)
bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf)); bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
return sfp; return sfp;
} }
@ -946,9 +970,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
if (!G.chunked) if (!G.chunked)
break; break;
fgets_and_trim(dfp, NULL); /* Eat empty line */ fgets_trim_sanitize(dfp, NULL); /* Eat empty line */
get_clen: get_clen:
fgets_and_trim(dfp, NULL); fgets_trim_sanitize(dfp, NULL);
G.content_len = STRTOOFF(G.wget_buf, NULL, 16); G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
/* FIXME: error check? */ /* FIXME: error check? */
if (G.content_len == 0) if (G.content_len == 0)
@ -1178,7 +1202,7 @@ static void download_one_url(const char *url)
* Retrieve HTTP response line and check for "200" status code. * Retrieve HTTP response line and check for "200" status code.
*/ */
read_response: read_response:
fgets_and_trim(sfp, " %s\n"); fgets_trim_sanitize(sfp, " %s\n");
str = G.wget_buf; str = G.wget_buf;
str = skip_non_whitespace(str); str = skip_non_whitespace(str);
@ -1189,7 +1213,7 @@ static void download_one_url(const char *url)
switch (status) { switch (status) {
case 0: case 0:
case 100: case 100:
while (gethdr(sfp) != NULL) while (get_sanitized_hdr(sfp) != NULL)
/* eat all remaining headers */; /* eat all remaining headers */;
goto read_response; goto read_response;
@ -1253,13 +1277,13 @@ However, in real world it was observed that some web servers
/* Partial Content even though we did not ask for it??? */ /* Partial Content even though we did not ask for it??? */
/* fall through */ /* fall through */
default: default:
bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); bb_error_msg_and_die("server returned error: %s", G.wget_buf);
} }
/* /*
* Retrieve HTTP headers. * Retrieve HTTP headers.
*/ */
while ((str = gethdr(sfp)) != NULL) { while ((str = get_sanitized_hdr(sfp)) != NULL) {
static const char keywords[] ALIGN1 = static const char keywords[] ALIGN1 =
"content-length\0""transfer-encoding\0""location\0"; "content-length\0""transfer-encoding\0""location\0";
enum { enum {
@ -1267,7 +1291,7 @@ However, in real world it was observed that some web servers
}; };
smalluint key; smalluint key;
/* gethdr converted "FOO:" string to lowercase */ /* get_sanitized_hdr converted "FOO:" string to lowercase */
/* strip trailing whitespace */ /* strip trailing whitespace */
char *s = strchrnul(str, '\0') - 1; char *s = strchrnul(str, '\0') - 1;
@ -1279,14 +1303,14 @@ However, in real world it was observed that some web servers
if (key == KEY_content_length) { if (key == KEY_content_length) {
G.content_len = BB_STRTOOFF(str, NULL, 10); G.content_len = BB_STRTOOFF(str, NULL, 10);
if (G.content_len < 0 || errno) { if (G.content_len < 0 || errno) {
bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); bb_error_msg_and_die("content-length %s is garbage", str);
} }
G.got_clen = 1; G.got_clen = 1;
continue; continue;
} }
if (key == KEY_transfer_encoding) { if (key == KEY_transfer_encoding) {
if (strcmp(str_tolower(str), "chunked") != 0) if (strcmp(str_tolower(str), "chunked") != 0)
bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
G.chunked = 1; G.chunked = 1;
} }
if (key == KEY_location && status >= 300) { if (key == KEY_location && status >= 300) {
@ -1295,7 +1319,7 @@ However, in real world it was observed that some web servers
fclose(sfp); fclose(sfp);
if (str[0] == '/') { if (str[0] == '/') {
free(redirected_path); free(redirected_path);
target.path = redirected_path = xstrdup(str+1); target.path = redirected_path = xstrdup(str + 1);
/* lsa stays the same: it's on the same server */ /* lsa stays the same: it's on the same server */
} else { } else {
parse_url(str, &target); parse_url(str, &target);
@ -1342,7 +1366,7 @@ However, in real world it was observed that some web servers
/* It's ftp. Close data connection properly */ /* It's ftp. Close data connection properly */
fclose(dfp); fclose(dfp);
if (ftpcmd(NULL, NULL, sfp) != 226) if (ftpcmd(NULL, NULL, sfp) != 226)
bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4)); bb_error_msg_and_die("ftp error: %s", G.wget_buf);
/* ftpcmd("QUIT", NULL, sfp); - why bother? */ /* ftpcmd("QUIT", NULL, sfp); - why bother? */
} }
fclose(sfp); fclose(sfp);