preparatory cleanups for seamless uncompression improvements

unpack_gz_stream_with_info: fix buggy error check
man: fix possible accesses past the end of a string
move seamless uncompression helpers from read_printf.c to open_transformer.c

function                                             old     new   delta
show_manpage                                         153     212     +59
unpack_gz_stream_with_info                           520     539     +19

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2012-03-06 16:23:50 +01:00
parent 17eedcad94
commit 59655077c5
6 changed files with 187 additions and 179 deletions

View File

@ -73,7 +73,7 @@
*/ */
IF_DESKTOP(long long) int FAST_FUNC IF_DESKTOP(long long) int FAST_FUNC
unpack_Z_stream(int fd_in, int fd_out) unpack_Z_stream(int src_fd, int dst_fd)
{ {
IF_DESKTOP(long long total_written = 0;) IF_DESKTOP(long long total_written = 0;)
IF_DESKTOP(long long) int retval = -1; IF_DESKTOP(long long) int retval = -1;
@ -105,14 +105,14 @@ unpack_Z_stream(int fd_in, int fd_out)
inbuf = xzalloc(IBUFSIZ + 64); inbuf = xzalloc(IBUFSIZ + 64);
outbuf = xzalloc(OBUFSIZ + 2048); outbuf = xzalloc(OBUFSIZ + 2048);
htab = xzalloc(HSIZE); /* wsn't zeroed out before, maybe can xmalloc? */ htab = xzalloc(HSIZE); /* wasn't zeroed out before, maybe can xmalloc? */
codetab = xzalloc(HSIZE * sizeof(codetab[0])); codetab = xzalloc(HSIZE * sizeof(codetab[0]));
insize = 0; insize = 0;
/* xread isn't good here, we have to return - caller may want /* xread isn't good here, we have to return - caller may want
* to do some cleanup (e.g. delete incomplete unpacked file etc) */ * to do some cleanup (e.g. delete incomplete unpacked file etc) */
if (full_read(fd_in, inbuf, 1) != 1) { if (full_read(src_fd, inbuf, 1) != 1) {
bb_error_msg("short read"); bb_error_msg("short read");
goto err; goto err;
} }
@ -162,7 +162,7 @@ unpack_Z_stream(int fd_in, int fd_out)
} }
if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) { if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); rsize = safe_read(src_fd, inbuf + insize, IBUFSIZ);
if (rsize < 0) if (rsize < 0)
bb_error_msg_and_die(bb_msg_read_error); bb_error_msg_and_die(bb_msg_read_error);
insize += rsize; insize += rsize;
@ -268,7 +268,7 @@ unpack_Z_stream(int fd_in, int fd_out)
} }
if (outpos >= OBUFSIZ) { if (outpos >= OBUFSIZ) {
xwrite(fd_out, outbuf, outpos); xwrite(dst_fd, outbuf, outpos);
IF_DESKTOP(total_written += outpos;) IF_DESKTOP(total_written += outpos;)
outpos = 0; outpos = 0;
} }
@ -296,7 +296,7 @@ unpack_Z_stream(int fd_in, int fd_out)
} while (rsize > 0); } while (rsize > 0);
if (outpos > 0) { if (outpos > 0) {
xwrite(fd_out, outbuf, outpos); xwrite(dst_fd, outbuf, outpos);
IF_DESKTOP(total_written += outpos;) IF_DESKTOP(total_written += outpos;)
} }

View File

@ -1182,33 +1182,37 @@ static int check_header_gzip(STATE_PARAM unpack_info_t *info)
} }
IF_DESKTOP(long long) int FAST_FUNC IF_DESKTOP(long long) int FAST_FUNC
unpack_gz_stream_with_info(int in, int out, unpack_info_t *info) unpack_gz_stream_with_info(int src_fd, int dst_fd, unpack_info_t *info)
{ {
uint32_t v32; uint32_t v32;
IF_DESKTOP(long long) int n; IF_DESKTOP(long long) int total, n;
DECLARE_STATE; DECLARE_STATE;
n = 0; total = 0;
ALLOC_STATE; ALLOC_STATE;
to_read = -1; to_read = -1;
// bytebuffer_max = 0x8000; // bytebuffer_max = 0x8000;
bytebuffer = xmalloc(bytebuffer_max); bytebuffer = xmalloc(bytebuffer_max);
gunzip_src_fd = in; gunzip_src_fd = src_fd;
again: again:
if (!check_header_gzip(PASS_STATE info)) { if (!check_header_gzip(PASS_STATE info)) {
bb_error_msg("corrupted data"); bb_error_msg("corrupted data");
n = -1; total = -1;
goto ret; goto ret;
} }
n += inflate_unzip_internal(PASS_STATE in, out);
if (n < 0) n = inflate_unzip_internal(PASS_STATE src_fd, dst_fd);
if (n < 0) {
total = -1;
goto ret; goto ret;
}
total += n;
if (!top_up(PASS_STATE 8)) { if (!top_up(PASS_STATE 8)) {
bb_error_msg("corrupted data"); bb_error_msg("corrupted data");
n = -1; total = -1;
goto ret; goto ret;
} }
@ -1216,7 +1220,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
v32 = buffer_read_le_u32(PASS_STATE_ONLY); v32 = buffer_read_le_u32(PASS_STATE_ONLY);
if ((~gunzip_crc) != v32) { if ((~gunzip_crc) != v32) {
bb_error_msg("crc error"); bb_error_msg("crc error");
n = -1; total = -1;
goto ret; goto ret;
} }
@ -1224,7 +1228,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
v32 = buffer_read_le_u32(PASS_STATE_ONLY); v32 = buffer_read_le_u32(PASS_STATE_ONLY);
if ((uint32_t)gunzip_bytes_out != v32) { if ((uint32_t)gunzip_bytes_out != v32) {
bb_error_msg("incorrect length"); bb_error_msg("incorrect length");
n = -1; total = -1;
} }
if (!top_up(PASS_STATE 2)) if (!top_up(PASS_STATE 2))
@ -1242,7 +1246,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
ret: ret:
free(bytebuffer); free(bytebuffer);
DEALLOC_STATE; DEALLOC_STATE;
return n; return total;
} }
IF_DESKTOP(long long) int FAST_FUNC IF_DESKTOP(long long) int FAST_FUNC

View File

@ -6,6 +6,16 @@
#include "libbb.h" #include "libbb.h"
#include "bb_archive.h" #include "bb_archive.h"
#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \
|| ENABLE_FEATURE_SEAMLESS_BZ2 \
|| ENABLE_FEATURE_SEAMLESS_GZ \
/* || ENABLE_FEATURE_SEAMLESS_Z */ \
)
#if ZIPPED
# include "bb_archive.h"
#endif
/* transformer(), more than meets the eye */ /* transformer(), more than meets the eye */
/* /*
* On MMU machine, the transform_prog is removed by macro magic * On MMU machine, the transform_prog is removed by macro magic
@ -52,3 +62,133 @@ void FAST_FUNC open_transformer(int fd,
close(fd_pipe.wr); /* don't want to write to the child */ close(fd_pipe.wr); /* don't want to write to the child */
xmove_fd(fd_pipe.rd, fd); xmove_fd(fd_pipe.rd, fd);
} }
/* Used by e.g. rpm which gives us a fd without filename,
* thus we can't guess the format from filename's extension.
*/
#if ZIPPED
void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/)
{
const int fail_if_not_detected = 1;
union {
uint8_t b[4];
uint16_t b16[2];
uint32_t b32[1];
} magic;
int offset = -2;
# if BB_MMU
IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
enum { xformer_prog = 0 };
# else
enum { xformer = 0 };
const char *xformer_prog;
# endif
/* .gz and .bz2 both have 2-byte signature, and their
* unpack_XXX_stream wants this header skipped. */
xread(fd, magic.b16, sizeof(magic.b16[0]));
if (ENABLE_FEATURE_SEAMLESS_GZ
&& magic.b16[0] == GZIP_MAGIC
) {
# if BB_MMU
xformer = unpack_gz_stream;
# else
xformer_prog = "gunzip";
# endif
goto found_magic;
}
if (ENABLE_FEATURE_SEAMLESS_BZ2
&& magic.b16[0] == BZIP2_MAGIC
) {
# if BB_MMU
xformer = unpack_bz2_stream;
# else
xformer_prog = "bunzip2";
# endif
goto found_magic;
}
if (ENABLE_FEATURE_SEAMLESS_XZ
&& magic.b16[0] == XZ_MAGIC1
) {
offset = -6;
xread(fd, magic.b32, sizeof(magic.b32[0]));
if (magic.b32[0] == XZ_MAGIC2) {
# if BB_MMU
xformer = unpack_xz_stream;
/* unpack_xz_stream wants fd at position 6, no need to seek */
//xlseek(fd, offset, SEEK_CUR);
# else
xformer_prog = "unxz";
# endif
goto found_magic;
}
}
/* No known magic seen */
if (fail_if_not_detected)
bb_error_msg_and_die("no gzip"
IF_FEATURE_SEAMLESS_BZ2("/bzip2")
IF_FEATURE_SEAMLESS_XZ("/xz")
" magic");
xlseek(fd, offset, SEEK_CUR);
return;
found_magic:
# if !BB_MMU
/* NOMMU version of open_transformer execs
* an external unzipper that wants
* file position at the start of the file */
xlseek(fd, offset, SEEK_CUR);
# endif
open_transformer(fd, xformer, xformer_prog);
}
#endif /* ZIPPED */
int FAST_FUNC open_zipped(const char *fname)
{
#if !ZIPPED
return open(fname, O_RDONLY);
#else
char *sfx;
int fd;
fd = open(fname, O_RDONLY);
if (fd < 0)
return fd;
sfx = strrchr(fname, '.');
if (sfx) {
sfx++;
if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0)
/* .lzma has no header/signature, just trust it */
open_transformer(fd, unpack_lzma_stream, "unlzma");
else
if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0)
|| (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0)
|| (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0)
) {
setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/);
}
}
return fd;
#endif
}
void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
{
int fd;
char *image;
fd = open_zipped(fname);
if (fd < 0)
return NULL;
image = xmalloc_read(fd, maxsz_p);
if (!image)
bb_perror_msg("read error from '%s'", fname);
close(fd);
return image;
}

View File

@ -713,6 +713,14 @@ extern void *xmalloc_read(int fd, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
extern void *xmalloc_open_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC; extern void *xmalloc_open_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
/* Never returns NULL */ /* Never returns NULL */
extern void *xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC; extern void *xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
#define SEAMLESS_COMPRESSION (0 \
|| ENABLE_FEATURE_SEAMLESS_XZ \
|| ENABLE_FEATURE_SEAMLESS_LZMA \
|| ENABLE_FEATURE_SEAMLESS_BZ2 \
|| ENABLE_FEATURE_SEAMLESS_GZ \
|| ENABLE_FEATURE_SEAMLESS_Z)
/* Autodetects gzip/bzip2 formats. fd may be in the middle of the file! */ /* Autodetects gzip/bzip2 formats. fd may be in the middle of the file! */
#if ENABLE_FEATURE_SEAMLESS_LZMA \ #if ENABLE_FEATURE_SEAMLESS_LZMA \
|| ENABLE_FEATURE_SEAMLESS_BZ2 \ || ENABLE_FEATURE_SEAMLESS_BZ2 \

View File

@ -8,16 +8,6 @@
*/ */
#include "libbb.h" #include "libbb.h"
#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \
|| ENABLE_FEATURE_SEAMLESS_BZ2 \
|| ENABLE_FEATURE_SEAMLESS_GZ \
/* || ENABLE_FEATURE_SEAMLESS_Z */ \
)
#if ZIPPED
# include "bb_archive.h"
#endif
/* Suppose that you are a shell. You start child processes. /* Suppose that you are a shell. You start child processes.
* They work and eventually exit. You want to get user input. * They work and eventually exit. You want to get user input.
@ -244,132 +234,3 @@ void* FAST_FUNC xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p)
bb_perror_msg_and_die("can't read '%s'", filename); bb_perror_msg_and_die("can't read '%s'", filename);
return buf; return buf;
} }
/* Used by e.g. rpm which gives us a fd without filename,
* thus we can't guess the format from filename's extension.
*/
#if ZIPPED
void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/)
{
const int fail_if_not_detected = 1;
union {
uint8_t b[4];
uint16_t b16[2];
uint32_t b32[1];
} magic;
int offset = -2;
# if BB_MMU
IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
enum { xformer_prog = 0 };
# else
enum { xformer = 0 };
const char *xformer_prog;
# endif
/* .gz and .bz2 both have 2-byte signature, and their
* unpack_XXX_stream wants this header skipped. */
xread(fd, magic.b16, sizeof(magic.b16[0]));
if (ENABLE_FEATURE_SEAMLESS_GZ
&& magic.b16[0] == GZIP_MAGIC
) {
# if BB_MMU
xformer = unpack_gz_stream;
# else
xformer_prog = "gunzip";
# endif
goto found_magic;
}
if (ENABLE_FEATURE_SEAMLESS_BZ2
&& magic.b16[0] == BZIP2_MAGIC
) {
# if BB_MMU
xformer = unpack_bz2_stream;
# else
xformer_prog = "bunzip2";
# endif
goto found_magic;
}
if (ENABLE_FEATURE_SEAMLESS_XZ
&& magic.b16[0] == XZ_MAGIC1
) {
offset = -6;
xread(fd, magic.b32, sizeof(magic.b32[0]));
if (magic.b32[0] == XZ_MAGIC2) {
# if BB_MMU
xformer = unpack_xz_stream;
/* unpack_xz_stream wants fd at position 6, no need to seek */
//xlseek(fd, offset, SEEK_CUR);
# else
xformer_prog = "unxz";
# endif
goto found_magic;
}
}
/* No known magic seen */
if (fail_if_not_detected)
bb_error_msg_and_die("no gzip"
IF_FEATURE_SEAMLESS_BZ2("/bzip2")
IF_FEATURE_SEAMLESS_XZ("/xz")
" magic");
xlseek(fd, offset, SEEK_CUR);
return;
found_magic:
# if !BB_MMU
/* NOMMU version of open_transformer execs
* an external unzipper that wants
* file position at the start of the file */
xlseek(fd, offset, SEEK_CUR);
# endif
open_transformer(fd, xformer, xformer_prog);
}
#endif /* ZIPPED */
int FAST_FUNC open_zipped(const char *fname)
{
#if !ZIPPED
return open(fname, O_RDONLY);
#else
char *sfx;
int fd;
fd = open(fname, O_RDONLY);
if (fd < 0)
return fd;
sfx = strrchr(fname, '.');
if (sfx) {
sfx++;
if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0)
/* .lzma has no header/signature, just trust it */
open_transformer(fd, unpack_lzma_stream, "unlzma");
else
if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0)
|| (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0)
|| (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0)
) {
setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/);
}
}
return fd;
#endif
}
void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
{
int fd;
char *image;
fd = open_zipped(fname);
if (fd < 0)
return NULL;
image = xmalloc_read(fd, maxsz_p);
if (!image)
bb_perror_msg("read error from '%s'", fname);
close(fd);
return image;
}

View File

@ -30,16 +30,6 @@ echo ".pl \n(nlu+10"
*/ */
#if ENABLE_FEATURE_SEAMLESS_LZMA
#define Z_SUFFIX ".lzma"
#elif ENABLE_FEATURE_SEAMLESS_BZ2
#define Z_SUFFIX ".bz2"
#elif ENABLE_FEATURE_SEAMLESS_GZ
#define Z_SUFFIX ".gz"
#else
#define Z_SUFFIX ""
#endif
static int show_manpage(const char *pager, char *man_filename, int man, int level); static int show_manpage(const char *pager, char *man_filename, int man, int level);
static int run_pipe(const char *pager, char *man_filename, int man, int level) static int run_pipe(const char *pager, char *man_filename, int man, int level)
@ -102,7 +92,7 @@ static int run_pipe(const char *pager, char *man_filename, int man, int level)
/* Links do not have .gz extensions, even if manpage /* Links do not have .gz extensions, even if manpage
* is compressed */ * is compressed */
man_filename = xasprintf("%s/%s" Z_SUFFIX, man_filename, linkname); man_filename = xasprintf("%s/%s", man_filename, linkname);
free(line); free(line);
/* Note: we leak "new" man_filename string as well... */ /* Note: we leak "new" man_filename string as well... */
if (show_manpage(pager, man_filename, man, level + 1)) if (show_manpage(pager, man_filename, man, level + 1))
@ -124,32 +114,37 @@ static int run_pipe(const char *pager, char *man_filename, int man, int level)
return 1; return 1;
} }
/* man_filename is of the form "/dir/dir/dir/name.s" Z_SUFFIX */ /* man_filename is of the form "/dir/dir/dir/name.s" */
static int show_manpage(const char *pager, char *man_filename, int man, int level) static int show_manpage(const char *pager, char *man_filename, int man, int level)
{ {
#if SEAMLESS_COMPRESSION
/* We leak this allocation... */
char *filename_with_zext = xasprintf("%s.lzma", man_filename);
char *ext = strrchr(filename_with_zext, '.') + 1;
#endif
#if ENABLE_FEATURE_SEAMLESS_LZMA #if ENABLE_FEATURE_SEAMLESS_LZMA
if (run_pipe(pager, filename_with_zext, man, level))
return 1;
#endif
#if ENABLE_FEATURE_SEAMLESS_XZ
strcpy(ext, "xz");
if (run_pipe(pager, man_filename, man, level)) if (run_pipe(pager, man_filename, man, level))
return 1; return 1;
#endif #endif
#if ENABLE_FEATURE_SEAMLESS_BZ2 #if ENABLE_FEATURE_SEAMLESS_BZ2
#if ENABLE_FEATURE_SEAMLESS_LZMA strcpy(ext, "bz2");
strcpy(strrchr(man_filename, '.') + 1, "bz2");
#endif
if (run_pipe(pager, man_filename, man, level)) if (run_pipe(pager, man_filename, man, level))
return 1; return 1;
#endif #endif
#if ENABLE_FEATURE_SEAMLESS_GZ #if ENABLE_FEATURE_SEAMLESS_GZ
#if ENABLE_FEATURE_SEAMLESS_LZMA || ENABLE_FEATURE_SEAMLESS_BZ2 strcpy(ext, "gz");
strcpy(strrchr(man_filename, '.') + 1, "gz");
#endif
if (run_pipe(pager, man_filename, man, level)) if (run_pipe(pager, man_filename, man, level))
return 1; return 1;
#endif #endif
#if ENABLE_FEATURE_SEAMLESS_LZMA || ENABLE_FEATURE_SEAMLESS_BZ2 || ENABLE_FEATURE_SEAMLESS_GZ #if SEAMLESS_COMPRESSION
*strrchr(man_filename, '.') = '\0'; ext[-1] = '\0';
#endif #endif
if (run_pipe(pager, man_filename, man, level)) if (run_pipe(pager, man_filename, man, level))
return 1; return 1;
@ -262,7 +257,7 @@ int man_main(int argc UNUSED_PARAM, char **argv)
/* Search for cat, then man page */ /* Search for cat, then man page */
while (cat0man1 < 2) { while (cat0man1 < 2) {
int found_here; int found_here;
man_filename = xasprintf("%s/%s%.*s/%s.%.*s" Z_SUFFIX, man_filename = xasprintf("%s/%s%.*s/%s.%.*s",
cur_path, cur_path,
"cat\0man" + (cat0man1 * 4), "cat\0man" + (cat0man1 * 4),
sect_len, cur_sect, sect_len, cur_sect,