memcmp is memcpy is xor is...
Date: 03/14/05
(Algorithms) Keywords: no keywords
This approach may be well known, but it's new and neat to me. The fundamental idea is picking the appropriate rules for reducing two bits into one bit. Repeating that step leads to implementations for copying, xoring, comparing, and others.
"Appropriate rules" is one of the sixteen combinations of two two-state values. The code below uses the numeric encoding defined here: here (details).
Actually implementing memcpy this way is senseless. I've run some tests, and performance is comparable for small chunks of small blocks. But as either grow the stock impl runs away really fast.
Apologies for the grisly code. I have a much easier to read version but it's a handful of lines longer. All original, licensed under the GNU GPL.
quick sample:
void* oft_xor(void *dst, const void *src1, const void *src2, size_t len)
{
return oft_emit(dst, src1, src2, len, op_xor);
}
void* oft_memcpy(void *dst, const void *src, size_t len)
{
return oft_emit(dst, src, NULL, len, op_left);
}
#include
/* All original, licensed under the GNU GPL. seth.schroeder@gmail.com. Part of the oft (one from two) library. */
#ifdef __cplusplus
#define boolean bool
#else
typedef int boolean;
#ifndef TRUE_FALSE_DEFINED
#define true 1
#define false 0
#endif
#endif
void* oft_xor(void *dst, const void *src1, const void *src2, size_t len);
void* oft_memcpy(void *dst, const void *src, size_t len);
void* oft_fill(void *dst, size_t len);
boolean oft_memeq(const void *b1, const void *b2, size_t len);
boolean oft_memneq(const void *b1, const void *b2, size_t len);
boolean oft_mem_oppose(const void *b1, const void *b2, size_t len);
static const int op_left = 10;
static const int op_always = 15;
static const int op_xor = 6;
static const int op_same = 9;
#define idx(a, b) \
((a) + 2 * (b))
#define ival(op, expr) \
(op & 1 << (expr))
#define bval(op, expr) \
(((op) & 1 << (expr)) != false)
#define advance(ptr) \
if (ptr) ptr++
#define testval(ptr, val) \
((!ptr) ? false : ((*(const unsigned char*)ptr & val) != false))
void* oft_emit(void *dst, const void *s1, const void *s2, size_t bytes, int op)
{
unsigned char *d = (unsigned char*)dst;
const unsigned char *l = (const unsigned char *)s1;
const unsigned char *r = (const unsigned char *)s2;
while (bytes--)
{
if (ival(op, idx(testval(l, 0x01), testval(r, 0x01))))
*d |= 0x01; else *d &= 0xFE;
if (ival(op, idx(testval(l, 0x02), testval(r, 0x02))))
*d |= 0x02; else *d &= 0xFD;
if (ival(op, idx(testval(l, 0x04), testval(r, 0x04))))
*d |= 0x04; else *d &= 0xFB;
if (ival(op, idx(testval(l, 0x08), testval(r, 0x08))))
*d |= 0x08; else *d &= 0xF7;
if (ival(op, idx(testval(l, 0x10), testval(r, 0x10))))
*d |= 0x10; else *d &= 0xEF;
if (ival(op, idx(testval(l, 0x20), testval(r, 0x20))))
*d |= 0x20; else *d &= 0xDF;
if (ival(op, idx(testval(l, 0x40), testval(r, 0x40))))
*d |= 0x40; else *d &= 0xBF;
if (ival(op, idx(testval(l, 0x80), testval(r, 0x80))))
*d |= 0x80; else *d &= 0x7F;
advance(d);
advance(l);
advance(r);
}
return d;
}
boolean oft_match(const void *s1, const void *s2, size_t n, int op,
boolean pat)
{
const unsigned char *l = (const unsigned char *)s1;
const unsigned char *r = (const unsigned char *)s2;
while(n--)
{
if (bval(op, idx(testval(l, 0x01), testval(r, 0x01))) == pat ||
bval(op, idx(testval(l, 0x02), testval(r, 0x02))) == pat ||
bval(op, idx(testval(l, 0x04), testval(r, 0x04))) == pat ||
bval(op, idx(testval(l, 0x08), testval(r, 0x08))) == pat ||
bval(op, idx(testval(l, 0x10), testval(r, 0x10))) == pat ||
bval(op, idx(testval(l, 0x20), testval(r, 0x20))) == pat ||
bval(op, idx(testval(l, 0x40), testval(r, 0x40))) == pat ||
bval(op, idx(testval(l, 0x80), testval(r, 0x80))) == pat)
return true;
advance(l);
advance(r);
}
return false;
}
boolean oft_match_any(const void *l, const void *r, size_t n, int op)
{
return oft_match(l, r, n, op, true);
}
boolean oft_match_all(const void *l, const void *r, size_t n, int op)
{
return !oft_match(l, r, n, op, false);
}
void* oft_xor(void *dst, const void *src1, const void *src2, size_t len)
{
return oft_emit(dst, src1, src2, len, op_xor);
}
void* oft_memcpy(void *dst, const void *src, size_t len)
{
return oft_emit(dst, src, NULL, len, op_left);
}
void* oft_fill(void *dst, size_t len)
{
return oft_emit(dst, NULL, NULL, len, op_always);
}
boolean oft_memeq(const void *b1, const void *b2, size_t len)
{
return oft_match_all(b1, b2, len, op_same);
}
boolean oft_memneq(const void *b1, const void *b2, size_t len)
{
return oft_match_any(b1, b2, len, op_xor);
}
boolean oft_mem_oppose(const void *b1, const void *b2, size_t len)
{
return oft_match_all(b1, b2, len, op_xor);
}
the second and third columns are in microseconds up to one second, then in seconds afterwards. Run on a 1x1.8 GHz G5.
# memcpy oft 1 byte
1 0 1
2 0 0
4 0 1
8 0 1
16 1 1
32 0 4
64 1 7
128 3 13
256 5 27
512 10 53
# memcpy oft 2 bytes
1 0 1
2 0 0
4 0 1
8 0 2
16 0 4
32 0 7
64 1 12
128 2 25
256 4 49
512 8 94
# memcpy oft 4 bytes
1 0 0
2 0 1
4 0 2
8 0 3
16 0 6
32 1 12
64 1 23
128 2 48
256 4 95
512 8 192
# memcpy oft 8 bytes
1 1 0
2 0 1
4 0 3
8 0 6
16 0 12
32 1 24
64 1 86
128 2 96
256 4 192
512 8 382
# memcpy oft 16 bytes
1 0 2
2 0 3
4 0 6
8 0 12
16 0 24
32 1 48
64 1 94
128 2 190
256 4 379
512 8 812
# memcpy oft 32 bytes
1 0 3
2 0 6
4 0 12
8 0 24
16 0 48
32 1 95
64 1 190
128 2 378
256 5 754
512 9 1512
# memcpy oft 64 bytes
1 0 6
2 0 12
4 0 24
8 0 48
16 1 94
32 1 194
64 1 379
128 3 760
256 7 2465
512 13 3101
# memcpy oft 128 bytes
1 2 12
2 1 23
4 0 47
8 0 95
16 0 190
32 1 378
64 1 756
128 3 1512
256 6 3045
512 11 6249
# memcpy oft 256 bytes
1 2 23
2 0 48
4 0 95
8 0 187
16 0 377
32 0 761
64 2 1534
128 3 3031
256 6 6130
512 15 15804
# memcpy oft 512 bytes
1 3 47
2 1 94
4 0 187
8 0 375
16 1 1596
32 3 1510
64 2 3178
128 4 6196
256 10 12339
512 19 25461
# memcpy oft 1024 bytes
1 3 95
2 0 190
4 0 377
8 0 753
16 1 1504
32 2 3116
64 4 6209
128 10 12258
256 18 26003
512 34 49812
# memcpy oft 2048 bytes
1 3 189
2 0 381
4 1 871
8 3 1500
16 2 3121
32 4 9837
64 11 13289
128 17 25521
256 33 50316
512 63 100882
# memcpy oft 4096 bytes
1 5 373
2 1 751
4 1 1511
8 2 3143
16 6 6072
32 8 12931
64 18 24895
128 34 50472
256 64 100426
512 124 205406
# memcpy oft 8192 bytes
1 57 750
2 1 1513
4 2 3178
8 7 6117
16 8 12251
32 18 25397
64 33 50933
128 64 101312
256 126 201690
512 247 407443
# memcpy oft 16384 bytes
1 75 1506
2 2 3032
4 4 6869
8 11 13132
16 19 24837
32 33 50750
64 65 104674
128 124 202902
256 248 409276
512 493 817020
# memcpy oft 32768 bytes
1 118 3090
2 6 6186
4 15 12297
8 22 25320
16 37 51467
32 67 101540
64 126 202058
128 349 412381
256 522 821465
512 979 1.638
Source: http://www.livejournal.com/community/algorithms/49994.html