From 7c577d95af21f6d5789ea0be7e79c433648d9222 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Sat, 4 Aug 2012 01:08:26 +0200 Subject: [PATCH] add stuff --- build.sh | 11 +++ compare.pl | 24 +++++++ dummyfuncs.c | 4 ++ memcpy_2_i386.s | 29 ++++++++ memcpy_musl_093.c | 29 ++++++++ memcpy_test.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 298 insertions(+) create mode 100755 build.sh create mode 100755 compare.pl create mode 100644 dummyfuncs.c create mode 100644 memcpy_2_i386.s create mode 100644 memcpy_musl_093.c create mode 100644 memcpy_test.c diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..e254784 --- /dev/null +++ b/build.sh @@ -0,0 +1,11 @@ +#!/bin/sh +if [ -z "$1" ] ; then + echo error, pass filename of file containing mymemcpy + exit 1 +fi + +[ -z "$CC" ] && CC=gcc +$CC -c dummyfuncs.c -o dummyfuncs.o +$CC -D_GNU_SOURCE -D_BSD_SOURCE -O3 -finline-functions -DFILENAME=\"$1\" -c memcpy_test.c -o memcpy_test.o +$CC -O3 -c "$1" -o mymemcpy.o +$CC dummyfuncs.o memcpy_test.o mymemcpy.o -o test diff --git a/compare.pl b/compare.pl new file mode 100755 index 0000000..75db12f --- /dev/null +++ b/compare.pl @@ -0,0 +1,24 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use File::Slurp; + +my @file1 = read_file($ARGV[0]) or die; +my @file2 = read_file($ARGV[1]) or die; + +sub getvals { + my $x = shift; + my @a = split /\t/, $x; + return $a[0], $a[1]; +} + + +for(@file1) { + my ($size, $val) = getvals($_); + my $s = shift(@file2); + my ($size2, $val2) = getvals($s); + exit 1 if($size != $size2); + chomp($val); + chomp($val2); + print "size: $size\t$val\t$val2\n"; +} diff --git a/dummyfuncs.c b/dummyfuncs.c new file mode 100644 index 0000000..6aac7af --- /dev/null +++ b/dummyfuncs.c @@ -0,0 +1,4 @@ +#include + +void fillmem(void* mem, size_t size) { } +int dummy_access(void* mem, size_t size) { return 0; } \ No newline at end of file diff --git a/memcpy_2_i386.s b/memcpy_2_i386.s new file mode 100644 index 0000000..fe7c004 --- /dev/null +++ b/memcpy_2_i386.s @@ -0,0 +1,29 @@ +.global mymemcpy +.type mymemcpy,@function +mymemcpy: + push %esi + push %edi + mov 12(%esp),%edi + mov 16(%esp),%esi + mov 20(%esp),%ecx + mov %edi,%eax + cmp $4,%ecx + jc 1f + test $3,%edi + jz 1f +2: movsb + dec %ecx + test $3,%edi + jnz 2b +1: mov %ecx,%edx + shr $2,%ecx + rep + movsl + and $3,%edx + jz 1f +2: movsb + dec %edx + jnz 2b +1: pop %edi + pop %esi + ret diff --git a/memcpy_musl_093.c b/memcpy_musl_093.c new file mode 100644 index 0000000..441e3bb --- /dev/null +++ b/memcpy_musl_093.c @@ -0,0 +1,29 @@ +#include +#include +#include + +#define SS (sizeof(size_t)) +#define ALIGN (sizeof(size_t)-1) +#define ONES ((size_t)-1/UCHAR_MAX) + +void *mymemcpy(void *dest, const void *src, size_t n) +{ + unsigned char *d = dest; + const unsigned char *s = src; + + if (((uintptr_t)d & ALIGN) != ((uintptr_t)s & ALIGN)) + goto misaligned; + + for (; ((uintptr_t)d & ALIGN) && n; n--) *d++ = *s++; + if (n) { + size_t *wd = (void *)d; + const size_t *ws = (const void *)s; + + for (; n>=SS; n-=SS) *wd++ = *ws++; + d = (void *)wd; + s = (const void *)ws; +misaligned: + for (; n; n--) *d++ = *s++; + } + return dest; +} \ No newline at end of file diff --git a/memcpy_test.c b/memcpy_test.c new file mode 100644 index 0000000..875adf4 --- /dev/null +++ b/memcpy_test.c @@ -0,0 +1,201 @@ +#include +#include +#include +#include + +#include + +unsigned long long microsecpassed(struct timeval* t) { + struct timeval now, diff; + gettimeofday(&now, NULL); + timersub(&now, t, &diff); + return (diff.tv_sec * 1000 * 1000) + diff.tv_usec; +} + +#if defined(__i386__) + +inline unsigned long long rdtsc() { + unsigned int lo, hi; + __asm__ volatile ( + "cpuid \n" + "rdtsc" + : "=a"(lo), "=d"(hi) /* outputs */ + : "a"(0) /* inputs */ + : "%ebx", "%ecx"); /* clobbers*/ + return ((unsigned long long)lo) | (((unsigned long long)hi) << 32); +} +#elif 0 +static inline unsigned long long rdtsc(void) { + unsigned long long hi, lo; + __asm__ __volatile__( + "xorl %%eax, %%eax;\n\t" + "push %%ebx;" + "cpuid\n\t" + :: + :"%eax", "%ebx", "%ecx", "%edx"); + __asm__ __volatile__( + "rdtsc;" + : "=a" (lo), "=d" (hi) + ::); + __asm__ __volatile__( + "xorl %%eax, %%eax; cpuid;" + "pop %%ebx;" + :: + :"%eax", "%ebx", "%ecx", "%edx"); + + return (unsigned long long)hi << 32 | lo; +} + +#elif 0 +static inline unsigned long long rdtsc(void) +{ + unsigned long long int x; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); + return x; +} +#elif defined(__x86_64__) +static inline unsigned long long rdtsc(void) { + unsigned long long hi, lo; + __asm__ __volatile__( + "xorl %%eax, %%eax;\n\t" + "push %%rbx;" + "cpuid\n\t" + :: + :"%rax", "%rbx", "%rcx", "%rdx"); + __asm__ __volatile__( + "rdtsc;" + : "=a" (lo), "=d" (hi) + ::); + __asm__ __volatile__( + "xorl %%eax, %%eax; cpuid;" + "pop %%rbx;" + :: + :"%rax", "%rbx", "%rcx", "%rdx"); + + return (unsigned long long)hi << 32 | lo; +} +#elif 0 + +static inline unsigned long long rdtsc(void) +{ + unsigned hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); +} + +#elif defined(__powerpc__) + + +static __inline__ unsigned long long rdtsc(void) +{ + unsigned long long int result=0; + unsigned long int upper, lower,tmp; + __asm__ volatile( + "0: \n" + "\tmftbu %0 \n" + "\tmftb %1 \n" + "\tmftbu %2 \n" + "\tcmpw %2,%0 \n" + "\tbne 0b \n" + : "=r"(upper),"=r"(lower),"=r"(tmp) + ); + result = upper; + result = result<<32; + result = result|lower; + + return(result); +} + +#endif + + +extern void *mymemcpy(void *dest, const void *src, size_t n); +extern void fillmem(void* mem, size_t size); +extern int dummy_access(void* mem, size_t size); + + +static long long counter = 0; +static inline unsigned long test(size_t size) { + char *src, *dst; + long result; + long counter_mod_4; + + if(size > 4) + counter_mod_4 = counter % 4; + else + counter_mod_4 = 0; + + struct timeval start; + + + src = malloc(size); + dst = malloc(size); + + fillmem(src, size); // dummy call so that gcc can not guess the mem contents. + + + //gettimeofday(&start, NULL); + unsigned long long ticka = rdtsc(), tickb; + mymemcpy(dst + counter_mod_4, src + counter_mod_4, size - counter_mod_4); + tickb = rdtsc(); + result = tickb - ticka; + //result = microsecpassed(&start); + + if(dummy_access(dst, size) == 1) abort(); // dummy call so that gcc can not assume mem content is never accessed. + + counter ++; + free(src); + free(dst); + return result; +} + +int main(int argc, char** argv) { +#define K(X) (1024 * X) +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0])) + + printf("testing speed of %s\n", FILENAME); + + size_t testsizes[] = { + 3 ,4 ,5, + 8, + 15, 16, + 23, 24, 25, + 31, 32, 33, + 63, 64, 65, + 95, 96, 97, + 127, 128, 129, + 159, 160, 161, + 191, 192, 193, + 255, 256, 257, + 383, 384, 385, + 511, 512, 513, + 767, 768, 769, + 1023, 1024, 1025, + 1535, 1536, 1537, + 2048, 4096, 8192, + 16384, 32768, 65536, + K(128), K(256), K(512), + K(1024), K(2048), K(4096), + K(8192), K(16384), K(32768), K(65536), + }; + + unsigned long x, y, ymax; + unsigned long long res, smallest; + for (x = 0 ; x < ARRAY_SIZE(testsizes); x++) { + //smallest = 0xffffffff; + res = 0; + ymax = testsizes[x] > K(100) ? 100 : 10000; + for(y = 0; y < ymax; y++) { + res += test(testsizes[x]); + //if(res < smallest) smallest = res; + + } + res /= ymax; + fprintf(stdout, "%zu\t%lu ticks\n", testsizes[x], res); + fflush(stdout); + } + return 0; +} + + + -- 2.11.4.GIT