feat: 添加存储访问延时测试实验代码

This commit is contained in:
yanfeizhang
2022-05-08 21:36:51 +08:00
parent abff7ab5ae
commit 6a25cde2e1
4 changed files with 491 additions and 0 deletions

10
tests/memory/test01/Makefile Executable file
View File

@@ -0,0 +1,10 @@
CC = gcc
CFLAGS = -Wall -O2 -lrt -D__i386__
main: main.c clock.c
$(CC) $(CFLAGS) -o main main.c clock.c
clean:
rm -f main *.o *~

232
tests/memory/test01/clock.c Executable file
View File

@@ -0,0 +1,232 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/times.h>
#include "clock.h"
#include <time.h>
/* Routines for using cycle counter */
/* Detect whether running on Alpha */
#ifdef __alpha
#define IS_ALPHA 1
#else
#define IS_ALPHA 0
#endif
/* Detect whether running on x86 */
#ifdef __i386__
#define IS_x86 1
#else
#define IS_x86 0
#endif
/* Keep track of most recent reading of cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
#if IS_ALPHA
/* Use Alpha cycle timer to compute cycles. Then use
measured clock speed to compute seconds
*/
/*
* counterRoutine is an array of Alpha instructions to access
* the Alpha's processor cycle counter. It uses the rpcc
* instruction to access the counter. This 64 bit register is
* divided into two parts. The lower 32 bits are the cycles
* used by the current process. The upper 32 bits are wall
* clock cycles. These instructions read the counter, and
* convert the lower 32 bits into an unsigned int - this is the
* user space counter value.
* NOTE: The counter has a very limited time span. With a
* 450MhZ clock the counter can time things for about 9
* seconds. */
static unsigned int counterRoutine[] =
{
0x601fc000u,
0x401f0000u,
0x6bfa8001u
};
/* Cast the above instructions into a function. */
static unsigned int (*counter)(void)= (void *)counterRoutine;
void start_counter()
{
/* Get cycle counter */
cyc_hi = 0;
cyc_lo = counter();
}
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
ncyc_lo = counter();
ncyc_hi = 0;
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
}
return result;
}
#endif /* Alpha */
#if IS_x86
void access_counter(unsigned *hi, unsigned *lo)
{
/* Get cycle counter */
asm("rdtsc; movl %%edx,%0; movl %%eax,%1"
: "=r" (*hi), "=r" (*lo)
: /* No input */
: "%edx", "%eax");
}
void start_counter()
{
access_counter(&cyc_hi, &cyc_lo);
}
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
/* Get cycle counter */
access_counter(&ncyc_hi, &ncyc_lo);
/* Do double precision subtraction */
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
}
return result;
}
#endif /* x86 */
struct timespec time1 = {0, 0};
void start_timer()
{
clock_gettime(CLOCK_REALTIME, &time1);
}
long int get_timer()
{
struct timespec time2 = {0, 0};
clock_gettime(CLOCK_REALTIME, &time2);
long int usedMircoSecond = (time2.tv_sec-time1.tv_sec)*1000000000 + (time2.tv_nsec-time1.tv_nsec);
return usedMircoSecond;
}
double ovhd()
{
/* Do it twice to eliminate cache effects */
int i;
double result;
for (i = 0; i < 2; i++) {
start_counter();
result = get_counter();
}
return result;
}
/* Determine clock rate by measuring cycles
elapsed while sleeping for sleeptime seconds */
double mhz_full(int verbose, int sleeptime)
{
double rate;
start_counter();
sleep(sleeptime);
rate = get_counter()/(1e6*sleeptime);
if (verbose)
printf("Processor Clock Rate ~= %.1f MHz\n", rate);
return rate;
}
/* Version using a default sleeptime */
double mhz(int verbose)
{
return mhz_full(verbose, 2);
}
/** Special counters that compensate for timer interrupt overhead */
static double cyc_per_tick = 0.0;
#define NEVENT 100
#define THRESHOLD 1000
#define RECORDTHRESH 3000
/* Attempt to see how much time is used by timer interrupt */
static void callibrate(int verbose)
{
double oldt;
struct tms t;
clock_t oldc;
int e = 0;
times(&t);
oldc = t.tms_utime;
start_counter();
oldt = get_counter();
while (e <NEVENT) {
double newt = get_counter();
if (newt-oldt >= THRESHOLD) {
clock_t newc;
times(&t);
newc = t.tms_utime;
if (newc > oldc) {
double cpt = (newt-oldt)/(newc-oldc);
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)
cyc_per_tick = cpt;
/*
if (verbose)
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n",
newt-oldt, (int) (newc-oldc), cpt);
*/
e++;
oldc = newc;
}
oldt = newt;
}
}
if (verbose)
printf("Setting cyc_per_tick to %f\n", cyc_per_tick);
}
static clock_t start_tick = 0;
void start_comp_counter() {
struct tms t;
if (cyc_per_tick == 0.0)
callibrate(0);
times(&t);
start_tick = t.tms_utime;
start_counter();
}
double get_comp_counter() {
double time = get_counter();
double ctime;
struct tms t;
clock_t ticks;
times(&t);
ticks = t.tms_utime - start_tick;
ctime = time - ticks*cyc_per_tick;
/*
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n",
time, (int) ticks, ctime);
*/
return ctime;
}

27
tests/memory/test01/clock.h Executable file
View File

@@ -0,0 +1,27 @@
/* Routines for using cycle counter */
/* Start the counter */
void start_counter();
/* Get # cycles since counter started */
double get_counter();
void start_timer();
long int get_timer();
/* Measure overhead for counter */
double ovhd();
/* Determine clock rate of processor */
double mhz(int verbose);
/* Determine clock rate of processor, having more control over accuracy */
double mhz_full(int verbose, int sleeptime);
/** Special counters that compensate for timer interrupt overhead */
void start_comp_counter();
double get_comp_counter();

222
tests/memory/test01/main.c Executable file
View File

@@ -0,0 +1,222 @@
#include <stdlib.h>
#include <stdio.h>
#include "clock.h"
#define MINBYTES (1 << 11) // 内存测试区域从 2KB 开始
#define MAXBYTES (1 << 26) // 最大到 64 MB
#define MAXSTRIDE 64 // 循环步长从 1 到 64 字节
#define MAXELEMS MAXBYTES/sizeof(double)
double data[MAXELEMS]; // 测试用的全局内存数组
//实验用到的各种函数声明
void init_data(double *data, int n);
void run_delay_testing();
double get_seque_access_result(int size, int stride, int type);
double get_random_access_result(int size, int type);
void seque_access(int elems, int stride);
void random_access(int* random_index_arr, int count);
void create_rand_array(int max, int count, int* pArr);
int main()
{
init_data(data, MAXELEMS);
printf("Delay (ns)\n");
run_delay_testing();
printf("\n\n");
exit(0);
}
// init_data 初始化要访问的内存数据
void init_data(double *data, int n)
{
int i;
for (i = 0; i < n; i++)
{
data[i] = i;
}
}
// 运行内存访问延时测试
void run_delay_testing(){
int size; // 测试内存区域大小
int stride; // 内存区域访问循环步长
// 打印内存区域大小头信息
printf("\t");
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
if (size > (1 << 20)){
printf("%dm\t", size / (1 << 20));
}else{
printf("%dk\t", size / 1024);
}
}
printf("\n");
// 多次实验,进行内存顺序访问延时评估
// 外层循环控制步长依次从 1 到 64目的是不同的顺序步长的访问效果差异
// 内存循环控制数据大小依次从 2KB 开始到 64MB目的是要保证数据大小依次超过 L1、L2、L3
for (stride = 1; stride <= MAXSTRIDE; stride=stride+1) {
printf("s%d\t", stride);
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
printf("%.2f\t", get_seque_access_result(size, stride, 1));
}
printf("\n");
}
// 多次实验,进行内存随机访问延时评估
printf("\random\t");
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
printf("%.2f\t", get_random_access_result(size,1));
}
printf("\n");
}
// get_seque_access_result 测试存储访问延迟(L1/L2/L3,内存)
// 参数说明
// - size: 要测试的数据大小
// - stride: 步长
// - type: 0 获取带宽测试结果
// - 1 获取延时测试结果,单位是 CPU 周期数
double get_seque_access_result(int size, int stride, int type)
{
int i;
long int operations;
long int total_accessed_bytes;
long int used_microseconds;
int samples = 1000;
int elems = size / sizeof(double);
//循环测试 1000 次,以最大程度减少实验计算结果误差
start_timer();
for(i=0; i<samples; i++){
seque_access(elems, stride);
}
used_microseconds = get_timer();
if(0==used_microseconds){
return 0;
}
//本次实验所进行的总存储读取次数
operations = (long int)samples * (long int)elems / stride;
//本次实验所读取的总存储读取大小
total_accessed_bytes = operations * sizeof(double);
double result = 0;
// 获取带宽结果
if(0==type){
/* width = size(M)/ time(s)
= (total_accessed_bytes / 1000000) / (used / 1000000000)
= total_accessed_bytes*1000/used_microseconds;
*/
result = total_accessed_bytes * 1000 / used_microseconds;
// 获取延迟测试结果
}else if(1==type){
result = (double)used_microseconds/operations;
}
return result;
}
// 内存按照一定的步长进行顺序访问
void seque_access(int elems, int stride) /* The test function */
{
int i;
double result = 0.0;
volatile double sink;
for (i = 0; i < elems; i += stride) {
result += data[i];
}
sink = result; /* So compiler doesn't optimize away the loop */
}
// get_random_access_result 对存储进行随机访问测试(L1/L2/L3,内存)
// 参数说明
// - size: 要测试的数据大小
// - type: 0 获取带宽测试结果
// - 1 获取延时测试结果,单位是 CPU 周期数
double get_random_access_result(int size, int type)
{
int i;
int *p;
long int operations;
long int total_accessed_bytes;
long int used_microseconds;
int samples = 300;
int elems = size / sizeof(double);
int access_count = elems;
//在实验开始前,提前要随机访问的数组下标准备好
int* random_access_arr = malloc(access_count*sizeof(int));
for(i=0,p=random_access_arr; i<access_count; i++,p++){
*p = 0;
}
create_rand_array(elems, access_count, random_access_arr);
//开始进行随机访问测试,运行 300 次,以降低实验误差
start_timer();
for(i=0; i<samples; i++){
random_access(random_access_arr, access_count);
}
used_microseconds = get_timer();
//本次实验所进行的总存储读取次数
operations = (long int)samples * (long int)access_count;
//本次实验所读取的总存储读取大小
total_accessed_bytes = operations * sizeof(double);
double result = 0;;
// 获取带宽结果
if(0==type){
/* width = size(M)/ time(s)
= (total_accessed_bytes / 1000000) / (used / 1000000000)
= total_accessed_bytes*1000/used_microseconds;
*/
result = total_accessed_bytes * 1000 / used_microseconds;
// 获取延时结果
}else if(1==type){
result = used_microseconds/operations*2.4;
}
return result;
}
// 提前把要进行随机访问的数组下标准备好,用于随机访问测试
void create_rand_array(int max, int count, int* pArr)
{
int i;
for (i = 0; i < count; i ++,pArr++) {
int rd = rand();
int randRet = (long int)rd * max / RAND_MAX;
*pArr = randRet;
}
return;
}
// random_access 按照指定的随机下标对数据进行随机访问
void random_access(int* random_index_arr, int count) /* The test function */
{
int i;
double result = 0.0;
volatile double sink;
for (i = 0; i < count; i++) {
result += data[*(random_index_arr+i)];
}
//这一行是为了避免编译器把循环给优化掉了
sink = result;
}