mirror of
https://github.com/yanfeizhang/coder-kung-fu.git
synced 2026-05-05 11:44:13 +08:00
feat: 添加存储访问延时测试实验代码
This commit is contained in:
10
tests/memory/test01/Makefile
Executable file
10
tests/memory/test01/Makefile
Executable file
@@ -0,0 +1,10 @@
|
||||
CC = gcc
|
||||
CFLAGS = -Wall -O2 -lrt -D__i386__
|
||||
|
||||
main: main.c clock.c
|
||||
$(CC) $(CFLAGS) -o main main.c clock.c
|
||||
|
||||
clean:
|
||||
rm -f main *.o *~
|
||||
|
||||
|
||||
232
tests/memory/test01/clock.c
Executable file
232
tests/memory/test01/clock.c
Executable file
@@ -0,0 +1,232 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/times.h>
|
||||
#include "clock.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
/* Routines for using cycle counter */
|
||||
|
||||
/* Detect whether running on Alpha */
|
||||
#ifdef __alpha
|
||||
#define IS_ALPHA 1
|
||||
#else
|
||||
#define IS_ALPHA 0
|
||||
#endif
|
||||
|
||||
/* Detect whether running on x86 */
|
||||
#ifdef __i386__
|
||||
#define IS_x86 1
|
||||
#else
|
||||
#define IS_x86 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/* Keep track of most recent reading of cycle counter */
|
||||
static unsigned cyc_hi = 0;
|
||||
static unsigned cyc_lo = 0;
|
||||
|
||||
#if IS_ALPHA
|
||||
/* Use Alpha cycle timer to compute cycles. Then use
|
||||
measured clock speed to compute seconds
|
||||
*/
|
||||
|
||||
/*
|
||||
* counterRoutine is an array of Alpha instructions to access
|
||||
* the Alpha's processor cycle counter. It uses the rpcc
|
||||
* instruction to access the counter. This 64 bit register is
|
||||
* divided into two parts. The lower 32 bits are the cycles
|
||||
* used by the current process. The upper 32 bits are wall
|
||||
* clock cycles. These instructions read the counter, and
|
||||
* convert the lower 32 bits into an unsigned int - this is the
|
||||
* user space counter value.
|
||||
* NOTE: The counter has a very limited time span. With a
|
||||
* 450MhZ clock the counter can time things for about 9
|
||||
* seconds. */
|
||||
static unsigned int counterRoutine[] =
|
||||
{
|
||||
0x601fc000u,
|
||||
0x401f0000u,
|
||||
0x6bfa8001u
|
||||
};
|
||||
|
||||
/* Cast the above instructions into a function. */
|
||||
static unsigned int (*counter)(void)= (void *)counterRoutine;
|
||||
|
||||
|
||||
void start_counter()
|
||||
{
|
||||
/* Get cycle counter */
|
||||
cyc_hi = 0;
|
||||
cyc_lo = counter();
|
||||
}
|
||||
|
||||
double get_counter()
|
||||
{
|
||||
unsigned ncyc_hi, ncyc_lo;
|
||||
unsigned hi, lo, borrow;
|
||||
double result;
|
||||
ncyc_lo = counter();
|
||||
ncyc_hi = 0;
|
||||
lo = ncyc_lo - cyc_lo;
|
||||
borrow = lo > ncyc_lo;
|
||||
hi = ncyc_hi - cyc_hi - borrow;
|
||||
result = (double) hi * (1 << 30) * 4 + lo;
|
||||
if (result < 0) {
|
||||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif /* Alpha */
|
||||
|
||||
#if IS_x86
|
||||
void access_counter(unsigned *hi, unsigned *lo)
|
||||
{
|
||||
/* Get cycle counter */
|
||||
asm("rdtsc; movl %%edx,%0; movl %%eax,%1"
|
||||
: "=r" (*hi), "=r" (*lo)
|
||||
: /* No input */
|
||||
: "%edx", "%eax");
|
||||
}
|
||||
|
||||
void start_counter()
|
||||
{
|
||||
access_counter(&cyc_hi, &cyc_lo);
|
||||
}
|
||||
|
||||
double get_counter()
|
||||
{
|
||||
unsigned ncyc_hi, ncyc_lo;
|
||||
unsigned hi, lo, borrow;
|
||||
double result;
|
||||
/* Get cycle counter */
|
||||
access_counter(&ncyc_hi, &ncyc_lo);
|
||||
/* Do double precision subtraction */
|
||||
lo = ncyc_lo - cyc_lo;
|
||||
borrow = lo > ncyc_lo;
|
||||
hi = ncyc_hi - cyc_hi - borrow;
|
||||
result = (double) hi * (1 << 30) * 4 + lo;
|
||||
if (result < 0) {
|
||||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif /* x86 */
|
||||
struct timespec time1 = {0, 0};
|
||||
void start_timer()
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &time1);
|
||||
}
|
||||
|
||||
long int get_timer()
|
||||
{
|
||||
struct timespec time2 = {0, 0};
|
||||
clock_gettime(CLOCK_REALTIME, &time2);
|
||||
|
||||
long int usedMircoSecond = (time2.tv_sec-time1.tv_sec)*1000000000 + (time2.tv_nsec-time1.tv_nsec);
|
||||
return usedMircoSecond;
|
||||
}
|
||||
|
||||
double ovhd()
|
||||
{
|
||||
/* Do it twice to eliminate cache effects */
|
||||
int i;
|
||||
double result;
|
||||
for (i = 0; i < 2; i++) {
|
||||
start_counter();
|
||||
result = get_counter();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Determine clock rate by measuring cycles
|
||||
elapsed while sleeping for sleeptime seconds */
|
||||
double mhz_full(int verbose, int sleeptime)
|
||||
{
|
||||
double rate;
|
||||
start_counter();
|
||||
sleep(sleeptime);
|
||||
rate = get_counter()/(1e6*sleeptime);
|
||||
if (verbose)
|
||||
printf("Processor Clock Rate ~= %.1f MHz\n", rate);
|
||||
return rate;
|
||||
}
|
||||
|
||||
/* Version using a default sleeptime */
|
||||
double mhz(int verbose)
|
||||
{
|
||||
return mhz_full(verbose, 2);
|
||||
}
|
||||
|
||||
/** Special counters that compensate for timer interrupt overhead */
|
||||
|
||||
static double cyc_per_tick = 0.0;
|
||||
|
||||
#define NEVENT 100
|
||||
#define THRESHOLD 1000
|
||||
#define RECORDTHRESH 3000
|
||||
|
||||
/* Attempt to see how much time is used by timer interrupt */
|
||||
static void callibrate(int verbose)
|
||||
{
|
||||
double oldt;
|
||||
struct tms t;
|
||||
clock_t oldc;
|
||||
int e = 0;
|
||||
times(&t);
|
||||
oldc = t.tms_utime;
|
||||
start_counter();
|
||||
oldt = get_counter();
|
||||
while (e <NEVENT) {
|
||||
double newt = get_counter();
|
||||
if (newt-oldt >= THRESHOLD) {
|
||||
clock_t newc;
|
||||
times(&t);
|
||||
newc = t.tms_utime;
|
||||
if (newc > oldc) {
|
||||
double cpt = (newt-oldt)/(newc-oldc);
|
||||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)
|
||||
cyc_per_tick = cpt;
|
||||
/*
|
||||
if (verbose)
|
||||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n",
|
||||
newt-oldt, (int) (newc-oldc), cpt);
|
||||
*/
|
||||
e++;
|
||||
oldc = newc;
|
||||
}
|
||||
oldt = newt;
|
||||
}
|
||||
}
|
||||
if (verbose)
|
||||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick);
|
||||
}
|
||||
|
||||
static clock_t start_tick = 0;
|
||||
|
||||
void start_comp_counter() {
|
||||
struct tms t;
|
||||
if (cyc_per_tick == 0.0)
|
||||
callibrate(0);
|
||||
times(&t);
|
||||
start_tick = t.tms_utime;
|
||||
start_counter();
|
||||
}
|
||||
|
||||
double get_comp_counter() {
|
||||
double time = get_counter();
|
||||
double ctime;
|
||||
struct tms t;
|
||||
clock_t ticks;
|
||||
times(&t);
|
||||
ticks = t.tms_utime - start_tick;
|
||||
ctime = time - ticks*cyc_per_tick;
|
||||
/*
|
||||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n",
|
||||
time, (int) ticks, ctime);
|
||||
*/
|
||||
return ctime;
|
||||
}
|
||||
27
tests/memory/test01/clock.h
Executable file
27
tests/memory/test01/clock.h
Executable file
@@ -0,0 +1,27 @@
|
||||
/* Routines for using cycle counter */
|
||||
|
||||
/* Start the counter */
|
||||
void start_counter();
|
||||
|
||||
/* Get # cycles since counter started */
|
||||
double get_counter();
|
||||
|
||||
|
||||
void start_timer();
|
||||
long int get_timer();
|
||||
|
||||
|
||||
/* Measure overhead for counter */
|
||||
double ovhd();
|
||||
|
||||
/* Determine clock rate of processor */
|
||||
double mhz(int verbose);
|
||||
|
||||
/* Determine clock rate of processor, having more control over accuracy */
|
||||
double mhz_full(int verbose, int sleeptime);
|
||||
|
||||
/** Special counters that compensate for timer interrupt overhead */
|
||||
|
||||
void start_comp_counter();
|
||||
|
||||
double get_comp_counter();
|
||||
222
tests/memory/test01/main.c
Executable file
222
tests/memory/test01/main.c
Executable file
@@ -0,0 +1,222 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "clock.h"
|
||||
|
||||
#define MINBYTES (1 << 11) // 内存测试区域从 2KB 开始
|
||||
#define MAXBYTES (1 << 26) // 最大到 64 MB
|
||||
#define MAXSTRIDE 64 // 循环步长从 1 到 64 字节
|
||||
#define MAXELEMS MAXBYTES/sizeof(double)
|
||||
|
||||
double data[MAXELEMS]; // 测试用的全局内存数组
|
||||
|
||||
//实验用到的各种函数声明
|
||||
void init_data(double *data, int n);
|
||||
void run_delay_testing();
|
||||
double get_seque_access_result(int size, int stride, int type);
|
||||
double get_random_access_result(int size, int type);
|
||||
void seque_access(int elems, int stride);
|
||||
void random_access(int* random_index_arr, int count);
|
||||
void create_rand_array(int max, int count, int* pArr);
|
||||
|
||||
int main()
|
||||
{
|
||||
init_data(data, MAXELEMS);
|
||||
|
||||
printf("Delay (ns)\n");
|
||||
run_delay_testing();
|
||||
printf("\n\n");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// init_data 初始化要访问的内存数据
|
||||
void init_data(double *data, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
data[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// 运行内存访问延时测试
|
||||
void run_delay_testing(){
|
||||
int size; // 测试内存区域大小
|
||||
int stride; // 内存区域访问循环步长
|
||||
|
||||
// 打印内存区域大小头信息
|
||||
printf("\t");
|
||||
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
|
||||
if (size > (1 << 20)){
|
||||
printf("%dm\t", size / (1 << 20));
|
||||
}else{
|
||||
printf("%dk\t", size / 1024);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// 多次实验,进行内存顺序访问延时评估
|
||||
// 外层循环控制步长依次从 1 到 64,目的是不同的顺序步长的访问效果差异
|
||||
// 内存循环控制数据大小依次从 2KB 开始到 64MB,目的是要保证数据大小依次超过 L1、L2、L3
|
||||
for (stride = 1; stride <= MAXSTRIDE; stride=stride+1) {
|
||||
printf("s%d\t", stride);
|
||||
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
|
||||
printf("%.2f\t", get_seque_access_result(size, stride, 1));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// 多次实验,进行内存随机访问延时评估
|
||||
printf("\random\t");
|
||||
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) {
|
||||
printf("%.2f\t", get_random_access_result(size,1));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// get_seque_access_result 测试存储访问延迟(L1/L2/L3,内存)
|
||||
// 参数说明
|
||||
// - size: 要测试的数据大小
|
||||
// - stride: 步长
|
||||
// - type: 0 获取带宽测试结果
|
||||
// - 1 获取延时测试结果,单位是 CPU 周期数
|
||||
double get_seque_access_result(int size, int stride, int type)
|
||||
{
|
||||
int i;
|
||||
long int operations;
|
||||
long int total_accessed_bytes;
|
||||
long int used_microseconds;
|
||||
|
||||
int samples = 1000;
|
||||
int elems = size / sizeof(double);
|
||||
|
||||
//循环测试 1000 次,以最大程度减少实验计算结果误差
|
||||
start_timer();
|
||||
for(i=0; i<samples; i++){
|
||||
seque_access(elems, stride);
|
||||
}
|
||||
used_microseconds = get_timer();
|
||||
if(0==used_microseconds){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//本次实验所进行的总存储读取次数
|
||||
operations = (long int)samples * (long int)elems / stride;
|
||||
|
||||
//本次实验所读取的总存储读取大小
|
||||
total_accessed_bytes = operations * sizeof(double);
|
||||
|
||||
|
||||
double result = 0;
|
||||
// 获取带宽结果
|
||||
if(0==type){
|
||||
/* width = size(M)/ time(s)
|
||||
= (total_accessed_bytes / 1000000) / (used / 1000000000)
|
||||
= total_accessed_bytes*1000/used_microseconds;
|
||||
*/
|
||||
result = total_accessed_bytes * 1000 / used_microseconds;
|
||||
// 获取延迟测试结果
|
||||
}else if(1==type){
|
||||
result = (double)used_microseconds/operations;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 内存按照一定的步长进行顺序访问
|
||||
void seque_access(int elems, int stride) /* The test function */
|
||||
{
|
||||
int i;
|
||||
double result = 0.0;
|
||||
volatile double sink;
|
||||
|
||||
for (i = 0; i < elems; i += stride) {
|
||||
result += data[i];
|
||||
}
|
||||
sink = result; /* So compiler doesn't optimize away the loop */
|
||||
}
|
||||
|
||||
// get_random_access_result 对存储进行随机访问测试(L1/L2/L3,内存)
|
||||
// 参数说明
|
||||
// - size: 要测试的数据大小
|
||||
// - type: 0 获取带宽测试结果
|
||||
// - 1 获取延时测试结果,单位是 CPU 周期数
|
||||
double get_random_access_result(int size, int type)
|
||||
{
|
||||
int i;
|
||||
int *p;
|
||||
|
||||
long int operations;
|
||||
long int total_accessed_bytes;
|
||||
long int used_microseconds;
|
||||
|
||||
int samples = 300;
|
||||
int elems = size / sizeof(double);
|
||||
int access_count = elems;
|
||||
|
||||
//在实验开始前,提前要随机访问的数组下标准备好
|
||||
int* random_access_arr = malloc(access_count*sizeof(int));
|
||||
for(i=0,p=random_access_arr; i<access_count; i++,p++){
|
||||
*p = 0;
|
||||
}
|
||||
create_rand_array(elems, access_count, random_access_arr);
|
||||
|
||||
//开始进行随机访问测试,运行 300 次,以降低实验误差
|
||||
start_timer();
|
||||
for(i=0; i<samples; i++){
|
||||
random_access(random_access_arr, access_count);
|
||||
}
|
||||
used_microseconds = get_timer();
|
||||
|
||||
//本次实验所进行的总存储读取次数
|
||||
operations = (long int)samples * (long int)access_count;
|
||||
|
||||
//本次实验所读取的总存储读取大小
|
||||
total_accessed_bytes = operations * sizeof(double);
|
||||
|
||||
|
||||
double result = 0;;
|
||||
// 获取带宽结果
|
||||
if(0==type){
|
||||
|
||||
/* width = size(M)/ time(s)
|
||||
= (total_accessed_bytes / 1000000) / (used / 1000000000)
|
||||
= total_accessed_bytes*1000/used_microseconds;
|
||||
*/
|
||||
result = total_accessed_bytes * 1000 / used_microseconds;
|
||||
// 获取延时结果
|
||||
}else if(1==type){
|
||||
result = used_microseconds/operations*2.4;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 提前把要进行随机访问的数组下标准备好,用于随机访问测试
|
||||
void create_rand_array(int max, int count, int* pArr)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; i ++,pArr++) {
|
||||
int rd = rand();
|
||||
int randRet = (long int)rd * max / RAND_MAX;
|
||||
*pArr = randRet;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// random_access 按照指定的随机下标对数据进行随机访问
|
||||
void random_access(int* random_index_arr, int count) /* The test function */
|
||||
{
|
||||
int i;
|
||||
double result = 0.0;
|
||||
volatile double sink;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
result += data[*(random_index_arr+i)];
|
||||
}
|
||||
|
||||
//这一行是为了避免编译器把循环给优化掉了
|
||||
sink = result;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user