[base] bse64 encoder
Really slow implementation. Speed up on a rainy day.
This commit is contained in:
parent
83f1e4bdd9
commit
a29b5c8d07
@ -35,6 +35,7 @@ PROGRAMS=\
|
||||
all: $(PROGRAMS)
|
||||
|
||||
SOURCE=\
|
||||
base/base64.cc \
|
||||
base/error_state.cc \
|
||||
\
|
||||
caching/hint_array.cc \
|
||||
@ -228,6 +229,7 @@ thin_metadata_size: thin-provisioning/thin_metadata_size.o
|
||||
# Cache tools
|
||||
|
||||
CACHE_CHECK_SOURCE=\
|
||||
base/base64.cc \
|
||||
base/error_state.cc \
|
||||
persistent-data/checksum.cc \
|
||||
persistent-data/endian_utils.cc \
|
||||
|
186
base/base64.cc
Normal file
186
base/base64.cc
Normal file
@ -0,0 +1,186 @@
|
||||
#include "base/base64.h"
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace base;
|
||||
using namespace boost;
|
||||
using namespace std;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
char const *table_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
struct index_set {
|
||||
unsigned nr_valid_;
|
||||
unsigned index_[4];
|
||||
};
|
||||
|
||||
index_set split1(unsigned char c) {
|
||||
index_set r;
|
||||
|
||||
r.nr_valid_ = 2;
|
||||
r.index_[0] = c >> 2;
|
||||
r.index_[1] = (c & 3) << 4;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
index_set split2(unsigned char c1, unsigned char c2) {
|
||||
index_set r;
|
||||
|
||||
r.nr_valid_ = 3;
|
||||
r.index_[0] = c1 >> 2;
|
||||
r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4);
|
||||
r.index_[2] = (c2 & 15) << 2;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
index_set split3(unsigned char c1, unsigned char c2, unsigned c3) {
|
||||
index_set r;
|
||||
|
||||
r.nr_valid_ = 4;
|
||||
r.index_[0] = c1 >> 2;
|
||||
r.index_[1] = ((c1 & 3) << 4) | (c2 >> 4);
|
||||
r.index_[2] = ((c2 & 15) << 2) | (c3 >> 6);
|
||||
r.index_[3] = c3 & 63;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
index_set split(vector<unsigned char> const &raw, unsigned index) {
|
||||
unsigned remaining = std::min<unsigned>(raw.size() - index, 3);
|
||||
|
||||
switch (remaining) {
|
||||
case 1:
|
||||
return split1(raw.at(index));
|
||||
|
||||
case 2:
|
||||
return split2(raw.at(index), raw.at(index + 1));
|
||||
|
||||
case 3:
|
||||
return split3(raw.at(index), raw.at(index + 1), raw.at(index + 2));
|
||||
}
|
||||
|
||||
throw std::runtime_error("internal error, in split");
|
||||
}
|
||||
|
||||
optional<unsigned> char_to_index(char c) {
|
||||
// FIXME: very slow
|
||||
for (unsigned i = 0; i < 64; i++)
|
||||
if (table_[i] == c)
|
||||
return optional<unsigned>(i);
|
||||
|
||||
return optional<unsigned>();
|
||||
}
|
||||
|
||||
decoded_or_error success(vector<unsigned char> const &decoded) {
|
||||
return decoded_or_error(decoded);
|
||||
}
|
||||
|
||||
decoded_or_error fail(string msg) {
|
||||
return decoded_or_error(msg);
|
||||
}
|
||||
|
||||
decoded_or_error fail_char(char c) {
|
||||
ostringstream msg;
|
||||
msg << "bad input character: '" << c << "'";
|
||||
return fail(msg.str());
|
||||
}
|
||||
|
||||
decoded_or_error decode_quad(char c1, char c2, char c3, char c4) {
|
||||
typedef optional<unsigned> oi;
|
||||
unsigned char d1, d2, d3;
|
||||
vector<unsigned char> decoded;
|
||||
|
||||
oi i1 = char_to_index(c1);
|
||||
if (!i1)
|
||||
return fail_char(c1);
|
||||
|
||||
oi i2 = char_to_index(c2);
|
||||
if (!i2)
|
||||
return fail_char(c2);
|
||||
|
||||
d1 = (*i1 << 2) | (*i2 >> 4);
|
||||
decoded.push_back(d1);
|
||||
|
||||
d2 = (*i2 & 15) << 4;
|
||||
|
||||
if (c3 == '=') {
|
||||
// FIXME: I really think the push should be here
|
||||
// decoded.push_back(d2);
|
||||
return success(decoded);
|
||||
}
|
||||
|
||||
oi i3 = char_to_index(c3);
|
||||
if (!i3)
|
||||
return fail_char(c3);
|
||||
|
||||
d2 = d2 | (*i3 >> 2);
|
||||
decoded.push_back(d2);
|
||||
|
||||
d3 = (*i3 & 3) << 6;
|
||||
|
||||
if (c4 == '=') {
|
||||
// FIXME: I really think the push should be here
|
||||
// decoded.push_back(d3);
|
||||
return success(decoded);
|
||||
}
|
||||
|
||||
oi i4 = char_to_index(c4);
|
||||
if (!i4)
|
||||
return fail_char(c4);
|
||||
|
||||
d3 = d3 | *i4;
|
||||
decoded.push_back(d3);
|
||||
|
||||
return success(decoded);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
string
|
||||
base::base64_encode(vector<unsigned char> const &raw)
|
||||
{
|
||||
string r;
|
||||
|
||||
for (unsigned i = 0; i < raw.size(); i += 3) {
|
||||
unsigned j;
|
||||
index_set is = split(raw, i);
|
||||
|
||||
for (j = 0; j < is.nr_valid_; j++)
|
||||
r.push_back(table_[is.index_[j]]);
|
||||
|
||||
for (; j < 4; j++)
|
||||
r.push_back('=');
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
base::decoded_or_error
|
||||
base::base64_decode(string const &encoded)
|
||||
{
|
||||
if (encoded.length() % 4)
|
||||
return decoded_or_error("bad input length");
|
||||
|
||||
vector<unsigned char> decoded;
|
||||
|
||||
for (unsigned i = 0; i < encoded.length(); i += 4) {
|
||||
decoded_or_error doe = decode_quad(encoded[i], encoded[i + 1], encoded[i + 2], encoded[i + 3]);
|
||||
|
||||
vector<unsigned char> *v = get<vector<unsigned char> >(&doe);
|
||||
if (!v)
|
||||
return doe;
|
||||
|
||||
decoded.insert(decoded.end(), v->begin(), v->end());
|
||||
}
|
||||
|
||||
return decoded_or_error(decoded);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
20
base/base64.h
Normal file
20
base/base64.h
Normal file
@ -0,0 +1,20 @@
|
||||
#ifndef BASE_BASE64_H
|
||||
#define BASE_BASE64_H
|
||||
|
||||
#include <boost/variant.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace base {
|
||||
std::string base64_encode(std::vector<unsigned char> const &raw);
|
||||
|
||||
// Returns either the decoded data or an error string
|
||||
typedef boost::variant<std::vector<unsigned char>, std::string> decoded_or_error;
|
||||
decoded_or_error base64_decode(std::string const &encoded);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
#endif
|
@ -47,6 +47,7 @@ TEST_SOURCE=\
|
||||
\
|
||||
unit-tests/array_block_t.cc \
|
||||
unit-tests/array_t.cc \
|
||||
unit-tests/base64_t.cc \
|
||||
unit-tests/bitset_t.cc \
|
||||
unit-tests/block_t.cc \
|
||||
unit-tests/btree_t.cc \
|
||||
|
121
unit-tests/base64_t.cc
Normal file
121
unit-tests/base64_t.cc
Normal file
@ -0,0 +1,121 @@
|
||||
#include "gmock/gmock.h"
|
||||
#include "base/base64.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <stdlib.h>
|
||||
|
||||
using namespace base;
|
||||
using namespace boost;
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
typedef vector<unsigned char> bytes;
|
||||
|
||||
char const *wikipedia_examples[] = {
|
||||
"any carnal pleasure.", "YW55IGNhcm5hbCBwbGVhc3VyZS4=",
|
||||
"any carnal pleasure", "YW55IGNhcm5hbCBwbGVhc3VyZQ==",
|
||||
"any carnal pleasur", "YW55IGNhcm5hbCBwbGVhc3Vy",
|
||||
"any carnal pleasu", "YW55IGNhcm5hbCBwbGVhc3U=",
|
||||
"any carnal pleas", "YW55IGNhcm5hbCBwbGVhcw==",
|
||||
"pleasure.", "cGxlYXN1cmUu",
|
||||
"leasure.", "bGVhc3VyZS4=",
|
||||
"easure.", "ZWFzdXJlLg==",
|
||||
"asure.", "YXN1cmUu",
|
||||
"sure.", "c3VyZS4="
|
||||
};
|
||||
|
||||
void assert_fails(decoded_or_error const &eoe, string const &msg) {
|
||||
ASSERT_THAT(get<string>(eoe), Eq(msg));
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
||||
TEST(Base64Tests, encoding_an_empty_string)
|
||||
{
|
||||
bytes bs;
|
||||
ASSERT_THAT(base64_encode(bs), Eq(string()));
|
||||
}
|
||||
|
||||
TEST(Base64Tests, decoding_an_empty_string)
|
||||
{
|
||||
bytes bs;
|
||||
ASSERT_THAT(get<vector<unsigned char> >(base64_decode("")), Eq(bs));
|
||||
}
|
||||
|
||||
TEST(Base64Tests, encode_single_byte)
|
||||
{
|
||||
bytes bs(1);
|
||||
bs[0] = 0;
|
||||
|
||||
ASSERT_THAT(base64_encode(bs), Eq(string("AA==")));
|
||||
}
|
||||
|
||||
TEST(Base64Tests, encode_double_byte)
|
||||
{
|
||||
bytes bs(2, 0);
|
||||
ASSERT_THAT(base64_encode(bs), Eq(string("AAA=")));
|
||||
}
|
||||
|
||||
TEST(Base64Tests, encode_triple_byte)
|
||||
{
|
||||
bytes bs(3, 0);
|
||||
ASSERT_THAT(base64_encode(bs), Eq(string("AAAA")));
|
||||
}
|
||||
|
||||
TEST(Base64Tests, longer_encodings)
|
||||
{
|
||||
for (unsigned example = 0; example < 5; example++) {
|
||||
char const *in = wikipedia_examples[example * 2];
|
||||
char const *out = wikipedia_examples[example * 2 + 1];
|
||||
unsigned len = strlen(in);
|
||||
bytes bs(len);
|
||||
for (unsigned b = 0; b < len; b++)
|
||||
bs.at(b) = in[b];
|
||||
|
||||
ASSERT_THAT(base64_encode(bs), Eq(string(out)));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Base64Tests, decoding_fails_with_bad_size_input)
|
||||
{
|
||||
char const *err = "bad input length";
|
||||
|
||||
assert_fails(base64_decode("AAA"), err);
|
||||
assert_fails(base64_decode("AA"), err);
|
||||
assert_fails(base64_decode("A"), err);
|
||||
}
|
||||
|
||||
TEST(Base64Tests, encode_decode_cycle)
|
||||
{
|
||||
for (unsigned example = 0; example < 5; example++) {
|
||||
char const *in = wikipedia_examples[example * 2];
|
||||
unsigned len = strlen(in);
|
||||
bytes bs(len);
|
||||
for (unsigned b = 0; b < len; b++)
|
||||
bs.at(b) = in[b];
|
||||
|
||||
decoded_or_error doe = base64_decode(base64_encode(bs));
|
||||
ASSERT_THAT(get<vector<unsigned char> >(doe), Eq(bs));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Base64Tests, random_data)
|
||||
{
|
||||
for (unsigned len = 1; len < 17; len++) {
|
||||
for (unsigned example = 0; example < 10000; example++) {
|
||||
vector<unsigned char> raw(len);
|
||||
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
raw.at(i) = ::rand() % 256;
|
||||
|
||||
decoded_or_error doe = base64_decode(base64_encode(raw));
|
||||
ASSERT_THAT(get<vector<unsigned char> >(doe), Eq(raw));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
Loading…
Reference in New Issue
Block a user