diff --git a/tests/index.md b/tests/index.md index 16be659..fdbfe2a 100644 --- a/tests/index.md +++ b/tests/index.md @@ -5,6 +5,10 @@ ## 磁盘相关实验 - [使用fio磁盘压测工具进行性能压测分析](tests/disk/test01) +## 内存相关实验 +- [内存访问延时测试](tests/memory/test01) +- [内存访问带宽测试](tests/memory/test02) + ## 网络相关实验 - [PHP单语言的百万连接测试源码](tests/network/test01) - [通过多 IP 达成单机百万连接(支持c、java、php三种语言)](tests/network/test02) diff --git a/tests/memory/test01/main.c b/tests/memory/test01/main.c index 47cdc5f..d11d017 100755 --- a/tests/memory/test01/main.c +++ b/tests/memory/test01/main.c @@ -9,7 +9,6 @@ double data[MAXELEMS]; // 测试用的全局内存数组 -//实验用到的各种函数声明 void init_data(double *data, int n); void run_delay_testing(); double get_seque_access_result(int size, int stride, int type); @@ -105,7 +104,6 @@ double get_seque_access_result(int size, int stride, int type) //本次实验所读取的总存储读取大小 total_accessed_bytes = operations * sizeof(double); - double result = 0; // 获取带宽结果 @@ -131,9 +129,11 @@ void seque_access(int elems, int stride) /* The test function */ volatile double sink; for (i = 0; i < elems; i += stride) { - result += data[i]; + result += data[i]; } - sink = result; /* So compiler doesn't optimize away the loop */ + + //这一行是为了避免编译器把循环给优化掉了 + sink = result; } // get_random_access_result 对存储进行随机访问测试(L1/L2/L3,内存) diff --git a/tests/memory/test02/Makefile b/tests/memory/test02/Makefile new file mode 100755 index 0000000..84adaca --- /dev/null +++ b/tests/memory/test02/Makefile @@ -0,0 +1,10 @@ +CC = gcc +CFLAGS = -Wall -O2 -lrt -D__i386__ + +main: main.c clock.c + $(CC) $(CFLAGS) -o main main.c clock.c + +clean: + rm -f main *.o *~ + + diff --git a/tests/memory/test02/clock.c b/tests/memory/test02/clock.c new file mode 100755 index 0000000..d5eda18 --- /dev/null +++ b/tests/memory/test02/clock.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include "clock.h" +#include + + +/* Routines for using cycle counter */ + +/* Detect whether running on Alpha */ +#ifdef __alpha +#define IS_ALPHA 1 +#else +#define IS_ALPHA 0 +#endif + +/* Detect whether running on x86 */ +#ifdef __i386__ +#define IS_x86 1 +#else +#define IS_x86 0 +#endif + + + + +/* Keep track of most recent reading of cycle counter */ +static unsigned cyc_hi = 0; +static unsigned cyc_lo = 0; + +#if IS_ALPHA +/* Use Alpha cycle timer to compute cycles. Then use + measured clock speed to compute seconds +*/ + +/* + * counterRoutine is an array of Alpha instructions to access + * the Alpha's processor cycle counter. It uses the rpcc + * instruction to access the counter. This 64 bit register is + * divided into two parts. The lower 32 bits are the cycles + * used by the current process. The upper 32 bits are wall + * clock cycles. These instructions read the counter, and + * convert the lower 32 bits into an unsigned int - this is the + * user space counter value. + * NOTE: The counter has a very limited time span. With a + * 450MhZ clock the counter can time things for about 9 + * seconds. */ +static unsigned int counterRoutine[] = +{ + 0x601fc000u, + 0x401f0000u, + 0x6bfa8001u +}; + +/* Cast the above instructions into a function. */ +static unsigned int (*counter)(void)= (void *)counterRoutine; + + +void start_counter() +{ + /* Get cycle counter */ + cyc_hi = 0; + cyc_lo = counter(); +} + +double get_counter() +{ + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; + ncyc_lo = counter(); + ncyc_hi = 0; + lo = ncyc_lo - cyc_lo; + borrow = lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double) hi * (1 << 30) * 4 + lo; + if (result < 0) { + fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); + } + return result; +} +#endif /* Alpha */ + +#if IS_x86 +void access_counter(unsigned *hi, unsigned *lo) +{ + /* Get cycle counter */ + asm("rdtsc; movl %%edx,%0; movl %%eax,%1" + : "=r" (*hi), "=r" (*lo) + : /* No input */ + : "%edx", "%eax"); +} + +void start_counter() +{ + access_counter(&cyc_hi, &cyc_lo); +} + +double get_counter() +{ + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; + /* Get cycle counter */ + access_counter(&ncyc_hi, &ncyc_lo); + /* Do double precision subtraction */ + lo = ncyc_lo - cyc_lo; + borrow = lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double) hi * (1 << 30) * 4 + lo; + if (result < 0) { + fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); + } + return result; +} +#endif /* x86 */ +struct timespec time1 = {0, 0}; +void start_timer() +{ + clock_gettime(CLOCK_REALTIME, &time1); +} + +long int get_timer() +{ + struct timespec time2 = {0, 0}; + clock_gettime(CLOCK_REALTIME, &time2); + + long int usedMircoSecond = (time2.tv_sec-time1.tv_sec)*1000000000 + (time2.tv_nsec-time1.tv_nsec); + return usedMircoSecond; +} + +double ovhd() +{ + /* Do it twice to eliminate cache effects */ + int i; + double result; + for (i = 0; i < 2; i++) { + start_counter(); + result = get_counter(); + } + return result; +} + +/* Determine clock rate by measuring cycles + elapsed while sleeping for sleeptime seconds */ +double mhz_full(int verbose, int sleeptime) +{ + double rate; + start_counter(); + sleep(sleeptime); + rate = get_counter()/(1e6*sleeptime); + if (verbose) + printf("Processor Clock Rate ~= %.1f MHz\n", rate); + return rate; +} + +/* Version using a default sleeptime */ +double mhz(int verbose) +{ + return mhz_full(verbose, 2); +} + +/** Special counters that compensate for timer interrupt overhead */ + +static double cyc_per_tick = 0.0; + +#define NEVENT 100 +#define THRESHOLD 1000 +#define RECORDTHRESH 3000 + +/* Attempt to see how much time is used by timer interrupt */ +static void callibrate(int verbose) +{ + double oldt; + struct tms t; + clock_t oldc; + int e = 0; + times(&t); + oldc = t.tms_utime; + start_counter(); + oldt = get_counter(); + while (e = THRESHOLD) { + clock_t newc; + times(&t); + newc = t.tms_utime; + if (newc > oldc) { + double cpt = (newt-oldt)/(newc-oldc); + if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) + cyc_per_tick = cpt; + /* + if (verbose) + printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", + newt-oldt, (int) (newc-oldc), cpt); + */ + e++; + oldc = newc; + } + oldt = newt; + } + } + if (verbose) + printf("Setting cyc_per_tick to %f\n", cyc_per_tick); +} + +static clock_t start_tick = 0; + +void start_comp_counter() { + struct tms t; + if (cyc_per_tick == 0.0) + callibrate(0); + times(&t); + start_tick = t.tms_utime; + start_counter(); +} + +double get_comp_counter() { + double time = get_counter(); + double ctime; + struct tms t; + clock_t ticks; + times(&t); + ticks = t.tms_utime - start_tick; + ctime = time - ticks*cyc_per_tick; + /* + printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", + time, (int) ticks, ctime); + */ + return ctime; +} diff --git a/tests/memory/test02/clock.h b/tests/memory/test02/clock.h new file mode 100755 index 0000000..32fd4b0 --- /dev/null +++ b/tests/memory/test02/clock.h @@ -0,0 +1,27 @@ +/* Routines for using cycle counter */ + +/* Start the counter */ +void start_counter(); + +/* Get # cycles since counter started */ +double get_counter(); + + +void start_timer(); +long int get_timer(); + + +/* Measure overhead for counter */ +double ovhd(); + +/* Determine clock rate of processor */ +double mhz(int verbose); + +/* Determine clock rate of processor, having more control over accuracy */ +double mhz_full(int verbose, int sleeptime); + +/** Special counters that compensate for timer interrupt overhead */ + +void start_comp_counter(); + +double get_comp_counter(); diff --git a/tests/memory/test02/main.c b/tests/memory/test02/main.c new file mode 100755 index 0000000..5fb52c1 --- /dev/null +++ b/tests/memory/test02/main.c @@ -0,0 +1,227 @@ +#include +#include +#include "clock.h" + +#define MINBYTES (1 << 11) // 内存测试区域从 2KB 开始 +#define MAXBYTES (1 << 26) // 最大到 64 MB +#define MAXSTRIDE 64 // 循环步长从 1 到 64 字节 +#define MAXELEMS MAXBYTES/sizeof(double) + +double data[MAXELEMS]; // 测试用的全局内存数组 + +void init_data(double *data, int n); +void run_width_testing(); +double get_seque_access_result(int size, int stride, int type); +double get_random_access_result(int size, int type); +void seque_access(int elems, int stride); +void random_access(int* random_index_arr, int count); +void create_rand_array(int max, int count, int* pArr); + +int main() +{ + init_data(data, MAXELEMS); + + printf("Band Width (MB/sec)\n"); + run_width_testing(); + printf("\n\n"); + + exit(0); +} + +// init_data 初始化要访问的内存数据 +void init_data(double *data, int n) +{ + int i; + + for (i = 0; i < n; i++) + data[i] = i; +} + +// 运行内存访问带宽测试 +void run_width_testing() +{ + int size; // 测试内存区域大小 + int stride; // 内存区域访问循环步长 + + // 打印内存区域大小头信息 + printf("\t"); + for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { + if (size > (1 << 20)){ + printf("%dm\t", size / (1 << 20)); + }else{ + printf("%dk\t", size / 1024); + } + } + printf("\n"); + + // 多次实验,进行内存顺序访问带宽评估 + // 外层循环控制步长依次从 1 到 64,目的是不同的顺序步长的访问效果差异 + // 内存循环控制数据大小依次从 2KB 开始到 64MB,目的是要保证数据大小依次超过 L1、L2、L3 + for (stride = 1; stride <= MAXSTRIDE; stride=stride+1) { + printf("s%d\t", stride); + for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { + printf("%.1f\t", get_seque_access_result(size, stride, 0)); + } + printf("\n"); + } + + // 多次实验,进行内存随机访问带宽评估 + printf("random\t"); + for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { + printf("%.1f\t", get_random_access_result(size,0)); + } + printf("\n"); +} + +// get_seque_access_result 测试存储访问延迟(L1/L2/L3,内存) +// 参数说明 +// - size: 要测试的数据大小 +// - stride: 步长 +// - type: 0 获取带宽测试结果 +// - 1 获取延时测试结果,单位是 CPU 周期数 +double get_seque_access_result(int size, int stride, int type) +{ + int i; + long int operations; + long int total_accessed_bytes; + long int used_microseconds; + + int samples = 1000; + int elems = size / sizeof(double); + + //循环测试 1000 次,以最大程度减少实验计算结果误差 + start_timer(); + for(i=0; i