-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathopenmp-mm_mult.c
111 lines (94 loc) · 2.8 KB
/
openmp-mm_mult.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
* File: main.c
* Author: Denis M.
*
* A very simple OpenMP code for a Matrix-Matrix multiplication.
*
* Created on November 4, 2012, 5:03 PM
*/
// Basic Includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// OpenMP Include
#include <omp.h>
int main(int argc, char *argv[])
{
short iN, fN, incN;
int nprocs = 0, iam = 0, mat_size, i, j, k;
float time, start, finish;
double *mat1, *mat2, *sol, mflops;
if(argc<4) {
printf("\n\nUSAGE: %s size_initial size_final size_increment\n\n",argv[0]);
return -1;
}
iN=atoi(argv[1]);
fN=atoi(argv[2]);
incN=atoi(argv[3]);
for(mat_size=iN; mat_size<=fN; mat_size+=incN) {
int mat_size2 = mat_size * mat_size;
// Allocating memory to the three matrix.
mat1 = (double *) malloc(sizeof(double)*mat_size2);
mat2 = (double *) malloc(sizeof(double)*mat_size2);
sol = (double *) malloc(sizeof(double)*mat_size2);
if(mat1==NULL || mat2==NULL || sol==NULL)
printf("\nError in Matrix allocation. Ask Morpheus.\n");
// Generating random values between 0 and 1 for mat1 and mat2.
// About sol, we will simply fill it with zeroes.
#pragma omp for schedule (dynamic, 20)
for (i = 0; i < mat_size2; i++) {
mat1[i] = (double)rand()/RAND_MAX;
mat2[i] = (double)rand()/RAND_MAX;
sol[i] = 0;
}
// note: nprocs is shared because we need it for later use.
#pragma omp parallel shared(nprocs) private(iam)
{
iam=omp_get_thread_num();
if(iam==0)
nprocs=omp_get_num_threads();
}
// Clock START.
start = omp_get_wtime();
// This block contains the matrix multiplication code.
{
// This (long x) optimizes the loops a little.
long x;
double temp;
// Where the magic begins.
#pragma omp parallel for schedule(runtime)
for (i = 0; i < mat_size; i++) {
x = i*mat_size;
for (j = 0; j < mat_size; j++) {
// Using 'reduction' with a temp variable
// optimizes the calculation a lot!
temp = 0;
#pragma omp parallel for reduction(+:temp)
for (k = 0; k < mat_size; k++)
temp = mat1[x+k] * mat2[k*mat_size+j];
sol[x+j] = temp;
}
}
}
// Clock STOP.
finish = omp_get_wtime();
time = finish - start;
if(time==0.) {
printf("\nNot enough precission.\n");
}
else {
// >>> mflops = (operations/time)/1000000
// > operations = (first for()) + (second for()) + (third for())
// (third for()) = (2*mat_size) <- because we have two floating point operations
mflops = ((double)mat_size2*(2.*(double)mat_size)/time)/1000000.;
printf("\n>>> Threads = %d\t\tSize = %d\t\tSeconds = %.6lf", nprocs, mat_size, time);
printf("\n> Mflops = %.6f\t\tMflops/thread = %.6f",mflops,mflops/nprocs);
printf("\n> Precision omp_get_wtick = %lf\n",omp_get_wtick());
}
// Free memory like a boss.
free(mat1);
free(mat2);
free(sol);
}
return EXIT_SUCCESS;
}