-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.cpp
115 lines (87 loc) · 3.3 KB
/
benchmark.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//
// (C) 2021, E. Wes Bethel
// benchmark-* harness for running different versions of vector-matrix multiply
// over different problem sizes
//
// usage: no command line arguments
// set problem sizes in the code below
#include <algorithm>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <random>
#include <vector>
#include <cmath> // For: fabs
#include <cblas.h>
#include <string.h>
// external definitions for mmul's
extern void my_dgemv(int, double*, double*, double *);
extern const char* dgemv_desc;
void reference_dgemv(int n, double* A, double* x, double *y) {
double alpha=1.0, beta=1.0;
int lda=n, incx=1, incy=1;
// cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, n, n, n, alpha, A, n, B, n, 1., C, n);
cblas_dgemv(CblasRowMajor, CblasNoTrans, n, n, alpha, A, lda, x, incx, beta, y, incy);
}
void fill(double* p, int n) {
static std::random_device rd;
static std::default_random_engine gen(rd());
static std::uniform_real_distribution<> dis(-1.0, 1.0);
for (int i = 0; i < n; ++i)
p[i] = 2 * dis(gen) - 1;
}
bool check_accuracy(double *A, double *Anot, int nvalues)
{
double eps = 1e-5;
for (size_t i = 0; i < nvalues; i++)
{
if (fabsf(A[i] - Anot[i]) > eps) {
return false;
}
}
return true;
}
/* The benchmarking program */
int main(int argc, char** argv)
{
std::cout << "Description:\t" << dgemv_desc << std::endl << std::endl;
std::cout << std::fixed << std::setprecision(5);
// we purposefully run the smallest problem twice so as to "condition"
// BLAS. For timing purposes, ignore the timing of the first problem size
std::vector<int> test_sizes{1024, 1024, 2048, 4096, 8192, 16384};
int n_problems = test_sizes.size();
// preallocate memory buffers for all problems: assume the last number in test_sizes is the largest
// allocate memory for 2 NxN matrices and 4 Nx1 vectors
int max_size = test_sizes[n_problems-1];
std::vector<double> buf(2 * max_size * max_size + 4 * max_size);
double* A = buf.data() + 0;
double* Acopy = A + max_size * max_size;
double* X = Acopy + max_size * max_size;
double* Xcopy = X + max_size;
double* Y = Xcopy + max_size;
double* Ycopy = Y + max_size;
// load up matrics with some random numbers
/* For each test size */
for (int n : test_sizes)
{
printf("Working on problem size N=%d \n", n);
fill(A, n * n);
fill(X, n );
fill(Y, n );
// make copies of A, B, C for use in verification of results
memcpy((void *)Acopy, (const void *)A, sizeof(double)*n*n);
memcpy((void *)Xcopy, (const void *)X, sizeof(double)*n);
memcpy((void *)Ycopy, (const void *)Y, sizeof(double)*n);
// insert start timer code here
// call the method to do the work
my_dgemv(n, A, X, Y);
// insert end timer code here, and print out the elapsed time for this problem size
// now invoke the cblas method to compute the matrix-vector multiplye
reference_dgemv(n, Acopy, Xcopy, Ycopy);
// compare your result with that computed by BLAS
if (check_accuracy(Ycopy, Y, n) == false)
printf(" Error: your answer is not the same as that computed by BLAS. \n");
} // end loop over problem sizes
return 0;
}
// EOF