Optimize internet checksum to use 16-bit fetches.

We could use 32-bit fetches with the same technique on 64-bit
architectures, or SIMD could be used to do very fast 128-bit
fetches, but this isn't a performance bottleneck and this
method is very simple and relatively fast.
This commit is contained in:
Nicholas J. Kain 2020-10-24 17:14:20 -04:00
parent 5fdf3bd83e
commit d07469a5fa

View File

@ -1,6 +1,8 @@
#ifndef NCMLIB_NET_CHECKSUM_H
#define NCMLIB_NET_CHECKSUM_H
// RFC 1071 is still a good reference.
#include <stdint.h>
// When summing ones-complement 16-bit values using a 32-bit unsigned
@ -14,30 +16,28 @@ static inline uint16_t net_checksum161c_foldcarry(uint32_t v)
return v;
}
// This function is not suitable for summing buffers that are greater than
// 128k bytes in length: failure case will be incorrect checksums via
// unsigned overflow, which is a defined operation and is safe. This limit
// should not be an issue for IPv4 or IPv6 packet, which are limited to
// at most 64k bytes.
// Produces the correct result on little endian in the sense that
// the binary value returned, when stored to memory, will match
// the result on big endian; if the numeric value returned
// must match big endian results, then call ntohs() on the result.
static uint16_t net_checksum161c(const void *buf, size_t size)
{
uint32_t sum = 0;
int odd = size & 0x01;
size_t i;
size &= ~((size_t)0x01);
size >>= 1;
const uint8_t *b = (const uint8_t *)buf;
for (i = 0; i < size; ++i) {
uint16_t hi = b[i*2];
uint16_t lo = b[i*2+1];
sum += ntohs((lo + (hi << 8)));
const char *b = (const char *)buf;
const char *bend = b + size;
uint32_t sum = 0, sumo = 0;
if (size & 1) {
--bend;
uint8_t z[2] = { (uint8_t)*bend, 0 };
uint16_t t;
memcpy(&t, z, 2);
sumo = t;
}
if (odd) {
uint16_t hi = b[i*2];
uint16_t lo = 0;
sum += ntohs((lo + (hi << 8)));
for (; b != bend; b += 2) {
uint16_t t;
memcpy(&t, b, 2);
sum += t;
}
return ~net_checksum161c_foldcarry(sum);
return ~net_checksum161c_foldcarry(sum + sumo);
}
// For two sequences of bytes A and B that return checksums CS(A) and CS(B),