forked from MeghanaGudaram/HighPerformanceComputing
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAssignment2_ArrayReadWrite.cpp
More file actions
45 lines (40 loc) · 1.18 KB
/
Assignment2_ArrayReadWrite.cpp
File metadata and controls
45 lines (40 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include<stdio.h>
#include <omp.h>
#include <immintrin.h>
#include <iostream>
#include<chrono>
#include<ctime>
using namespace std;
using namespace std::chrono;
__m256 avx_memoryRead(float* array, int size)
{
__m256 sum = _mm256_set1_ps(0);
for (int i = 0; i < size ; i=i+8)
{
__m256 vect = _mm256_load_ps(&array[i]); // load and store implies read and write operations
_mm256_store_ps((float*)&vect,sum);
}
return sum;
}
int main()
{
#pragma omp parallel
{
int size=1000; // Size is 4000 bytes and not 4KB
float *array;
int status=posix_memalign((void**) &array, 32 , size*sizeof(float));
for(int j=0;j<size;j++)
array[j]=1;
__m256 val= _mm256_set1_ps(0);
high_resolution_clock::time_point t1 = high_resolution_clock::now();
for(int i=0;i<100;i++)
{
val=avx_memoryRead(array,size);
}
high_resolution_clock::time_point t2 = high_resolution_clock::now();
duration<double> time_span = duration_cast<duration<double>> (t2 - t1);
printf(" Time taken for 100 operations of %d bytes : %lf BW = %lf GB/s\n",size*4,time_span.count(), (size*4*100)/(1000000000 * time_span.count()));
printf("value %f\n",val[0]);
}
return 0;
}