feat: 添加存储访问延时测试实验代码

2026-06-30 09:56:07 +08:00 · 2022-05-08 21:36:51 +08:00
parent abff7ab5ae
commit 6a25cde2e1
4 changed files with 491 additions and 0 deletions
--- a/tests/memory/test01/Makefile
+++ b/tests/memory/test01/Makefile
@@ -0,0 +1,10 @@
+CC = gcc
+CFLAGS = -Wall -O2 -lrt -D__i386__
+
+main: main.c clock.c
+	$(CC) $(CFLAGS) -o main main.c clock.c 
+
+clean:
+	rm -f main *.o *~
+
+
--- a/tests/memory/test01/clock.c
+++ b/tests/memory/test01/clock.c
@@ -0,0 +1,232 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/times.h>
+#include "clock.h"
+#include <time.h>
+
+
+/* Routines for using cycle counter */
+
+/* Detect whether running on Alpha */
+#ifdef __alpha
+#define IS_ALPHA 1
+#else
+#define IS_ALPHA 0
+#endif
+
+/* Detect whether running on x86 */
+#ifdef __i386__
+#define IS_x86 1
+#else
+#define IS_x86 0
+#endif
+
+
+
+
+/* Keep track of most recent reading of cycle counter */
+static unsigned cyc_hi = 0;
+static unsigned cyc_lo = 0;
+
+#if IS_ALPHA
+/* Use Alpha cycle timer to compute cycles.  Then use
+   measured clock speed to compute seconds 
+*/
+
+/*
+ * counterRoutine is an array of Alpha instructions to access 
+ * the Alpha's processor cycle counter. It uses the rpcc 
+ * instruction to access the counter. This 64 bit register is 
+ * divided into two parts. The lower 32 bits are the cycles 
+ * used by the current process. The upper 32 bits are wall 
+ * clock cycles. These instructions read the counter, and 
+ * convert the lower 32 bits into an unsigned int - this is the 
+ * user space counter value.
+ * NOTE: The counter has a very limited time span. With a 
+ * 450MhZ clock the counter can time things for about 9 
+ * seconds. */
+static unsigned int counterRoutine[] =
+{
+ 0x601fc000u,
+ 0x401f0000u,
+ 0x6bfa8001u
+};
+
+/* Cast the above instructions into a function. */
+static unsigned int (*counter)(void)= (void *)counterRoutine;
+
+
+void start_counter()
+{
+  /* Get cycle counter */
+  cyc_hi = 0;
+  cyc_lo = counter();
+}
+
+double get_counter()
+{
+  unsigned ncyc_hi, ncyc_lo;
+  unsigned hi, lo, borrow;
+  double result;
+  ncyc_lo = counter();
+  ncyc_hi = 0;
+  lo = ncyc_lo - cyc_lo;
+  borrow = lo > ncyc_lo;
+  hi = ncyc_hi - cyc_hi - borrow;
+  result = (double) hi * (1 << 30) * 4 + lo;
+  if (result < 0) {
+    fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
+  }
+  return result;
+}
+#endif /* Alpha */
+
+#if IS_x86
+void access_counter(unsigned *hi, unsigned *lo)
+{
+  /* Get cycle counter */
+  asm("rdtsc; movl %%edx,%0; movl %%eax,%1" 
+      : "=r" (*hi), "=r" (*lo)
+      : /* No input */ 
+      : "%edx", "%eax");
+}
+
+void start_counter()
+{
+  access_counter(&cyc_hi, &cyc_lo);
+}
+
+double get_counter()
+{
+  unsigned ncyc_hi, ncyc_lo;
+  unsigned hi, lo, borrow;
+  double result;
+  /* Get cycle counter */
+  access_counter(&ncyc_hi, &ncyc_lo);
+  /* Do double precision subtraction */
+  lo = ncyc_lo - cyc_lo;
+  borrow = lo > ncyc_lo;
+  hi = ncyc_hi - cyc_hi - borrow;
+  result = (double) hi * (1 << 30) * 4 + lo;
+  if (result < 0) {
+    fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
+  }
+  return result;
+}
+#endif /* x86 */
+struct timespec time1 = {0, 0}; 
+void start_timer()
+{
+	clock_gettime(CLOCK_REALTIME, &time1);        
+}
+
+long int get_timer()
+{
+	struct timespec time2 = {0, 0}; 
+	clock_gettime(CLOCK_REALTIME, &time2);   
+	
+	long int usedMircoSecond = (time2.tv_sec-time1.tv_sec)*1000000000 + (time2.tv_nsec-time1.tv_nsec);
+	return usedMircoSecond;
+}
+
+double ovhd()
+{
+  /* Do it twice to eliminate cache effects */
+  int i;
+  double result;
+  for (i = 0; i < 2; i++) {
+    start_counter();
+    result = get_counter();
+  }
+  return result;
+}
+
+/* Determine clock rate by measuring cycles
+   elapsed while sleeping for sleeptime seconds */
+double mhz_full(int verbose, int sleeptime)
+{
+  double rate;
+  start_counter();
+  sleep(sleeptime);
+  rate = get_counter()/(1e6*sleeptime);
+  if (verbose) 
+    printf("Processor Clock Rate ~= %.1f MHz\n", rate);
+  return rate;
+}
+
+/* Version using a default sleeptime */
+double mhz(int verbose)
+{
+  return mhz_full(verbose, 2);
+}
+
+/** Special counters that compensate for timer interrupt overhead */
+
+static double cyc_per_tick = 0.0;
+
+#define NEVENT 100
+#define THRESHOLD 1000
+#define RECORDTHRESH 3000
+
+/* Attempt to see how much time is used by timer interrupt */
+static void callibrate(int verbose)
+{
+  double oldt;
+  struct tms t;
+  clock_t oldc;
+  int e = 0;
+  times(&t);
+  oldc = t.tms_utime;
+  start_counter();
+  oldt = get_counter();
+  while (e <NEVENT) {
+    double newt = get_counter();
+    if (newt-oldt >= THRESHOLD) {
+      clock_t newc;
+      times(&t);
+      newc = t.tms_utime;
+      if (newc > oldc) {
+	double cpt = (newt-oldt)/(newc-oldc);
+	if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)
+	  cyc_per_tick = cpt;
+	/*
+	if (verbose)
+	  printf("Saw event lasting %.0f cycles and %d ticks.  Ratio = %f\n",
+		 newt-oldt, (int) (newc-oldc), cpt);
+	*/
+	e++;
+	oldc = newc;
+      }
+      oldt = newt;
+    }
+  }
+  if (verbose)
+    printf("Setting cyc_per_tick to %f\n", cyc_per_tick);
+}
+
+static clock_t start_tick = 0;
+
+void start_comp_counter() {
+  struct tms t;
+  if (cyc_per_tick == 0.0)
+    callibrate(0);
+  times(&t);
+  start_tick = t.tms_utime;
+  start_counter();
+}
+
+double get_comp_counter() {
+  double time = get_counter();
+  double ctime;
+  struct tms t;
+  clock_t ticks;
+  times(&t);
+  ticks = t.tms_utime - start_tick;
+  ctime = time - ticks*cyc_per_tick;
+  /*
+  printf("Measured %.0f cycles.  Ticks = %d.  Corrected %.0f cycles\n",
+	 time, (int) ticks, ctime);
+  */
+  return ctime;
+}
--- a/tests/memory/test01/clock.h
+++ b/tests/memory/test01/clock.h
@@ -0,0 +1,27 @@
+/* Routines for using cycle counter */
+
+/* Start the counter */
+void start_counter();
+
+/* Get # cycles since counter started */
+double get_counter();
+
+
+void start_timer();
+long int get_timer();
+
+
+/* Measure overhead for counter */
+double ovhd();
+
+/* Determine clock rate of processor */
+double mhz(int verbose);
+
+/* Determine clock rate of processor, having more control over accuracy */
+double mhz_full(int verbose, int sleeptime);
+
+/** Special counters that compensate for timer interrupt overhead */
+
+void start_comp_counter();
+
+double get_comp_counter();
--- a/tests/memory/test01/main.c
+++ b/tests/memory/test01/main.c
@@ -0,0 +1,222 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "clock.h" 
+
+#define MINBYTES (1 << 11)  // 内存测试区域从 2KB 开始 
+#define MAXBYTES (1 << 26)  // 最大到 64 MB
+#define MAXSTRIDE 64        // 循环步长从 1 到 64 字节
+#define MAXELEMS MAXBYTES/sizeof(double) 
+
+double data[MAXELEMS];      // 测试用的全局内存数组
+
+//实验用到的各种函数声明
+void init_data(double *data, int n);
+void run_delay_testing();
+double get_seque_access_result(int size, int stride, int type);
+double get_random_access_result(int size, int type);
+void seque_access(int elems, int stride);
+void random_access(int* random_index_arr, int count);
+void create_rand_array(int max, int count, int* pArr);
+
+int main()
+{		
+	init_data(data, MAXELEMS); 	
+	
+	printf("Delay  (ns)\n");	
+	run_delay_testing();
+	printf("\n\n");
+	
+	exit(0);
+}
+
+// init_data 初始化要访问的内存数据
+void init_data(double *data, int n)
+{
+	int i;
+	for (i = 0; i < n; i++)
+	{
+		data[i] = i;
+	}
+}
+
+// 运行内存访问延时测试
+void run_delay_testing(){	
+	int size;        // 测试内存区域大小 
+	int stride;      // 内存区域访问循环步长
+	
+	// 打印内存区域大小头信息
+	printf("\t");
+	for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
+		if (size > (1 << 20)){
+			printf("%dm\t", size / (1 << 20));
+		}else{
+			printf("%dk\t", size / 1024);
+		}
+	}
+	printf("\n");	
+
+	// 多次实验，进行内存顺序访问延时评估
+	// 外层循环控制步长依次从 1 到 64，目的是不同的顺序步长的访问效果差异
+	// 内存循环控制数据大小依次从 2KB 开始到 64MB，目的是要保证数据大小依次超过 L1、L2、L3
+	for (stride = 1; stride <= MAXSTRIDE; stride=stride+1) {
+		printf("s%d\t", stride);		
+		for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {	
+			printf("%.2f\t", get_seque_access_result(size, stride, 1));
+		}
+		printf("\n");
+	}
+	
+	// 多次实验，进行内存随机访问延时评估
+	printf("\random\t");
+	for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {		
+		printf("%.2f\t", get_random_access_result(size,1));
+	}
+	printf("\n");
+}
+
+// get_seque_access_result 测试存储访问延迟(L1/L2/L3,内存)
+// 参数说明
+//		- size: 要测试的数据大小
+//		- stride: 步长
+//		- type: 0 获取带宽测试结果 
+//		-		1 获取延时测试结果，单位是 CPU 周期数
+double get_seque_access_result(int size, int stride, int type)
+{	
+	int i;	
+	long int operations;
+	long int total_accessed_bytes;
+	long int used_microseconds;
+	
+	int samples = 1000;		
+	int elems = size / sizeof(double); 
+			
+	//循环测试 1000 次，以最大程度减少实验计算结果误差
+	start_timer();
+	for(i=0; i<samples; i++){
+		seque_access(elems, stride);
+	}
+	used_microseconds = get_timer();
+	if(0==used_microseconds){
+		return 0;
+	}
+	
+	//本次实验所进行的总存储读取次数
+	operations = (long int)samples * (long int)elems / stride;	
+
+	//本次实验所读取的总存储读取大小
+	total_accessed_bytes = operations * sizeof(double);
+
+	
+	double result = 0;
+	// 获取带宽结果
+	if(0==type){ 
+		/* width 	= size(M)/ time(s) 
+					= (total_accessed_bytes / 1000000) / (used / 1000000000) 
+					= total_accessed_bytes*1000/used_microseconds;
+		*/	
+		result = total_accessed_bytes * 1000  / used_microseconds;
+	// 获取延迟测试结果	
+	}else if(1==type){
+		result = (double)used_microseconds/operations; 
+	}	
+	
+	return result;
+}
+
+// 内存按照一定的步长进行顺序访问
+void seque_access(int elems, int stride) /* The test function */
+{
+	int i;
+	double result = 0.0; 
+	volatile double sink; 
+
+	for (i = 0; i < elems; i += stride) {
+	result += data[i];  
+	}
+	sink = result; /* So compiler doesn't optimize away the loop */
+}
+
+// get_random_access_result 对存储进行随机访问测试(L1/L2/L3,内存)
+// 参数说明
+//		- size: 要测试的数据大小
+//		- type: 0 获取带宽测试结果 
+//		-		1 获取延时测试结果，单位是 CPU 周期数
+double get_random_access_result(int size, int type)
+{	
+	int i;
+	int *p;
+	
+	long int operations;
+	long int total_accessed_bytes;
+	long int used_microseconds;
+	
+	int samples = 300;		
+	int elems = size / sizeof(double); 
+	int access_count = elems;
+	
+	//在实验开始前，提前要随机访问的数组下标准备好
+	int* random_access_arr = malloc(access_count*sizeof(int));	
+	for(i=0,p=random_access_arr; i<access_count; i++,p++){
+		*p = 0;
+	}	
+	create_rand_array(elems, access_count, random_access_arr);	
+			
+	//开始进行随机访问测试，运行 300 次，以降低实验误差
+	start_timer();
+	for(i=0; i<samples; i++){
+		random_access(random_access_arr, access_count);
+	}
+	used_microseconds = get_timer();
+	
+	//本次实验所进行的总存储读取次数
+	operations = (long int)samples * (long int)access_count;	
+
+	//本次实验所读取的总存储读取大小
+	total_accessed_bytes = operations * sizeof(double);
+
+	
+	double result = 0;;
+	// 获取带宽结果
+	if(0==type){
+	
+		/* width 	= size(M)/ time(s) 
+					= (total_accessed_bytes / 1000000) / (used / 1000000000) 
+					= total_accessed_bytes*1000/used_microseconds;
+		*/	
+		result = total_accessed_bytes * 1000  / used_microseconds;
+	// 获取延时结果	
+	}else if(1==type){		
+		result = used_microseconds/operations*2.4; 
+	}	
+	
+	return result;
+}
+
+// 提前把要进行随机访问的数组下标准备好，用于随机访问测试
+void create_rand_array(int max, int count, int* pArr)
+{
+	int i;
+	for (i = 0; i < count; i ++,pArr++) {
+		int rd = rand();		
+		int randRet = (long int)rd * max / RAND_MAX;
+		*pArr = randRet;
+	}
+	return;
+}
+
+// random_access 按照指定的随机下标对数据进行随机访问
+void random_access(int* random_index_arr, int count) /* The test function */
+{ 
+	int i;
+	double result = 0.0; 
+	volatile double sink; 
+
+	for (i = 0; i < count; i++) {
+		result += data[*(random_index_arr+i)];  
+	}
+
+	//这一行是为了避免编译器把循环给优化掉了
+	sink = result; 
+}
+
+