memcmp is memcpy is xor is...

    Date: 03/14/05 (Algorithms)    Keywords: no keywords

    This approach may be well known, but it's new and neat to me. The fundamental idea is picking the appropriate rules for reducing two bits into one bit. Repeating that step leads to implementations for copying, xoring, comparing, and others.

    "Appropriate rules" is one of the sixteen combinations of two two-state values. The code below uses the numeric encoding defined here: here (details).

    Actually implementing memcpy this way is senseless. I've run some tests, and performance is comparable for small chunks of small blocks. But as either grow the stock impl runs away really fast.

    Apologies for the grisly code. I have a much easier to read version but it's a handful of lines longer. All original, licensed under the GNU GPL.

    quick sample:

    void* oft_xor(void *dst, const void *src1, const void *src2, size_t len)
    {
            return oft_emit(dst, src1, src2, len, op_xor);
    }
    void* oft_memcpy(void *dst, const void *src, size_t len)
    {
            return oft_emit(dst, src, NULL, len, op_left);
    }
    


    #include 
    
    /* All original, licensed under the GNU GPL. seth.schroeder@gmail.com. Part of the oft (one from two) library. */
    
    #ifdef __cplusplus
     #define boolean bool
    #else
     typedef int boolean;
     #ifndef TRUE_FALSE_DEFINED
      #define true 1
      #define false 0
     #endif
    #endif
    
    
    void* oft_xor(void *dst, const void *src1, const void *src2, size_t len);
    void* oft_memcpy(void *dst, const void *src, size_t len);
    void* oft_fill(void *dst, size_t len);
    
    
    boolean oft_memeq(const void *b1, const void *b2, size_t len);
    boolean oft_memneq(const void *b1, const void *b2, size_t len);
    boolean oft_mem_oppose(const void *b1, const void *b2, size_t len);
    
    
    static const int op_left = 10;
    static const int op_always = 15;
    static const int op_xor = 6;
    static const int op_same = 9;
    
    #define idx(a, b) \
            ((a) + 2 * (b))
    
    #define ival(op, expr) \
            (op & 1 << (expr))
    
    #define bval(op, expr) \
            (((op) & 1 << (expr)) != false)
    
    #define advance(ptr) \
            if (ptr) ptr++
    
    #define testval(ptr, val) \
            ((!ptr) ? false : ((*(const unsigned char*)ptr & val) != false))
    
    
    void* oft_emit(void *dst, const void *s1, const void *s2, size_t bytes, int op)
    {
            unsigned char *d = (unsigned char*)dst;
            const unsigned char *l = (const unsigned char *)s1;
            const unsigned char *r = (const unsigned char *)s2;
    
            while (bytes--)
            {
                    if (ival(op, idx(testval(l, 0x01), testval(r, 0x01))))
                            *d |= 0x01; else *d &= 0xFE;
                    if (ival(op, idx(testval(l, 0x02), testval(r, 0x02))))
                            *d |= 0x02; else *d &= 0xFD;
                    if (ival(op, idx(testval(l, 0x04), testval(r, 0x04))))
                            *d |= 0x04; else *d &= 0xFB;
                    if (ival(op, idx(testval(l, 0x08), testval(r, 0x08))))
                            *d |= 0x08; else *d &= 0xF7;
                    if (ival(op, idx(testval(l, 0x10), testval(r, 0x10))))
                            *d |= 0x10; else *d &= 0xEF;
                    if (ival(op, idx(testval(l, 0x20), testval(r, 0x20))))
                            *d |= 0x20; else *d &= 0xDF;
                    if (ival(op, idx(testval(l, 0x40), testval(r, 0x40))))
                            *d |= 0x40; else *d &= 0xBF;
                    if (ival(op, idx(testval(l, 0x80), testval(r, 0x80))))
                            *d |= 0x80; else *d &= 0x7F;
    
                    advance(d);
                    advance(l);
                    advance(r);
            }
    
            return d;
    }
    
    
    boolean oft_match(const void *s1, const void *s2, size_t n, int op,
                      boolean pat)
    {
            const unsigned char *l = (const unsigned char *)s1;
            const unsigned char *r = (const unsigned char *)s2;
    
            while(n--)
            {
                    if (bval(op, idx(testval(l, 0x01), testval(r, 0x01))) == pat ||
                        bval(op, idx(testval(l, 0x02), testval(r, 0x02))) == pat ||
                        bval(op, idx(testval(l, 0x04), testval(r, 0x04))) == pat ||
                        bval(op, idx(testval(l, 0x08), testval(r, 0x08))) == pat ||
                        bval(op, idx(testval(l, 0x10), testval(r, 0x10))) == pat ||
                        bval(op, idx(testval(l, 0x20), testval(r, 0x20))) == pat ||
                        bval(op, idx(testval(l, 0x40), testval(r, 0x40))) == pat ||
                        bval(op, idx(testval(l, 0x80), testval(r, 0x80))) == pat)
                            return true;
    
                    advance(l);
                    advance(r);
            }
            return false;
    }
    
    
    boolean oft_match_any(const void *l, const void *r, size_t n, int op)
    {
            return oft_match(l, r, n, op, true);
    }
    
    
    boolean oft_match_all(const void *l, const void *r, size_t n, int op)
    {
            return !oft_match(l, r, n, op, false);
    }
    
    
    void* oft_xor(void *dst, const void *src1, const void *src2, size_t len)
    {
            return oft_emit(dst, src1, src2, len, op_xor);
    }
    
    
    void* oft_memcpy(void *dst, const void *src, size_t len)
    {
            return oft_emit(dst, src, NULL, len, op_left);
    }
    
    
    void* oft_fill(void *dst, size_t len)
    {
            return oft_emit(dst, NULL, NULL, len, op_always);
    }
    
    
    boolean oft_memeq(const void *b1, const void *b2, size_t len)
    {
            return oft_match_all(b1, b2, len, op_same);
    }
    
    
    boolean oft_memneq(const void *b1, const void *b2, size_t len)
    {
            return oft_match_any(b1, b2, len, op_xor);
    }
    
    
    boolean oft_mem_oppose(const void *b1, const void *b2, size_t len)
    {
            return oft_match_all(b1, b2, len, op_xor);
    }
    
    
    



    the second and third columns are in microseconds up to one second, then in seconds afterwards. Run on a 1x1.8 GHz G5.
    #	memcpy	oft	1 byte
    1	0	1	
    2	0	0	
    4	0	1	
    8	0	1	
    16	1	1	
    32	0	4	
    64	1	7	
    128	3	13	
    256	5	27	
    512	10	53	
    #	memcpy	oft	2 bytes
    1	0	1	
    2	0	0	
    4	0	1	
    8	0	2	
    16	0	4	
    32	0	7	
    64	1	12	
    128	2	25	
    256	4	49	
    512	8	94	
    #	memcpy	oft	4 bytes
    1	0	0	
    2	0	1	
    4	0	2	
    8	0	3	
    16	0	6	
    32	1	12	
    64	1	23	
    128	2	48	
    256	4	95	
    512	8	192	
    #	memcpy	oft	8 bytes
    1	1	0	
    2	0	1	
    4	0	3	
    8	0	6	
    16	0	12	
    32	1	24	
    64	1	86	
    128	2	96	
    256	4	192	
    512	8	382	
    #	memcpy	oft	16 bytes
    1	0	2	
    2	0	3	
    4	0	6	
    8	0	12	
    16	0	24	
    32	1	48	
    64	1	94	
    128	2	190	
    256	4	379	
    512	8	812	
    #	memcpy	oft	32 bytes
    1	0	3	
    2	0	6	
    4	0	12	
    8	0	24	
    16	0	48	
    32	1	95	
    64	1	190	
    128	2	378	
    256	5	754	
    512	9	1512	
    #	memcpy	oft	64 bytes
    1	0	6	
    2	0	12	
    4	0	24	
    8	0	48	
    16	1	94	
    32	1	194	
    64	1	379	
    128	3	760	
    256	7	2465	
    512	13	3101	
    #	memcpy	oft	128 bytes
    1	2	12	
    2	1	23	
    4	0	47	
    8	0	95	
    16	0	190	
    32	1	378	
    64	1	756	
    128	3	1512	
    256	6	3045	
    512	11	6249	
    #	memcpy	oft	256 bytes
    1	2	23	
    2	0	48	
    4	0	95	
    8	0	187	
    16	0	377	
    32	0	761	
    64	2	1534	
    128	3	3031	
    256	6	6130	
    512	15	15804	
    #	memcpy	oft	512 bytes
    1	3	47	
    2	1	94	
    4	0	187	
    8	0	375	
    16	1	1596	
    32	3	1510	
    64	2	3178	
    128	4	6196	
    256	10	12339	
    512	19	25461	
    #	memcpy	oft	1024 bytes
    1	3	95	
    2	0	190	
    4	0	377	
    8	0	753	
    16	1	1504	
    32	2	3116	
    64	4	6209	
    128	10	12258	
    256	18	26003	
    512	34	49812	
    #	memcpy	oft	2048 bytes
    1	3	189	
    2	0	381	
    4	1	871	
    8	3	1500	
    16	2	3121	
    32	4	9837	
    64	11	13289	
    128	17	25521	
    256	33	50316	
    512	63	100882	
    #	memcpy	oft	4096 bytes
    1	5	373	
    2	1	751	
    4	1	1511	
    8	2	3143	
    16	6	6072	
    32	8	12931	
    64	18	24895	
    128	34	50472	
    256	64	100426	
    512	124	205406	
    #	memcpy	oft	8192 bytes
    1	57	750	
    2	1	1513	
    4	2	3178	
    8	7	6117	
    16	8	12251	
    32	18	25397	
    64	33	50933	
    128	64	101312	
    256	126	201690	
    512	247	407443	
    #	memcpy	oft	16384 bytes
    1	75	1506	
    2	2	3032	
    4	4	6869	
    8	11	13132	
    16	19	24837	
    32	33	50750	
    64	65	104674	
    128	124	202902	
    256	248	409276	
    512	493	817020	
    #	memcpy	oft	32768 bytes
    1	118	3090	
    2	6	6186	
    4	15	12297	
    8	22	25320	
    16	37	51467	
    32	67	101540	
    64	126	202058	
    128	349	412381	
    256	522	821465	
    512	979	1.638	
    

    Source: http://www.livejournal.com/community/algorithms/49994.html

« Microsoft Riddle again || Run time analysis of... »


antivirus | apache | asp | blogging | browser | bugtracking | cms | crm | css | database | ebay | ecommerce | google | hosting | html | java | jsp | linux | microsoft | mysql | offshore | offshoring | oscommerce | php | postgresql | programming | rss | security | seo | shopping | software | spam | spyware | sql | technology | templates | tracker | virus | web | xml | yahoo | home